Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • better-queue
  • master
2 results

Target

Select target project
  • ale/crawl
1 result
Select Git revision
  • better-queue
  • master
2 results
Show changes
Commits on Source (3)
include: "https://git.autistici.org/ai3/build-deb/raw/master/ci-common.yml" include: "https://git.autistici.org/ai3/build-deb/raw/master/ci-common.yml"
stages:
- test
- build_pkgsrc
- build_pkg
- upload_pkg
run_tests: run_tests:
stage: test stage: test
image: "golang:latest" image: "golang:1.19"
script: "go test -v ./..." script: "go test -v ./..."
...@@ -20,7 +20,7 @@ import ( ...@@ -20,7 +20,7 @@ import (
lutil "github.com/syndtr/goleveldb/leveldb/util" lutil "github.com/syndtr/goleveldb/leveldb/util"
) )
var errorRetryDelay = 180 * time.Second var errorRetryDelay = 12 * time.Hour
type gobDB struct { type gobDB struct {
*leveldb.DB *leveldb.DB
...@@ -188,6 +188,7 @@ func (c *Crawler) Enqueue(link Outlink, depth int) error { ...@@ -188,6 +188,7 @@ func (c *Crawler) Enqueue(link Outlink, depth int) error {
// See if it's in scope. // See if it's in scope.
if !c.scope.Check(link, depth) { if !c.scope.Check(link, depth) {
log.Printf("%s is not in scope", link.URL)
return nil return nil
} }
......
...@@ -13,19 +13,19 @@ import os ...@@ -13,19 +13,19 @@ import os
import sys import sys
archivebot_ignore_path = sys.argv[1] archivebot_ignore_path = sys.argv[1]
print 'package crawl\n\nvar defaultIgnorePatterns = []string{' print('package crawl\n\nvar defaultIgnorePatterns = []string{')
for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')): for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
try: try:
with open(fn) as fd: with open(fn) as fd:
print '\n\t// %s' % os.path.basename(fn) print('\n\t// %s' % os.path.basename(fn))
for p in json.load(fd)['patterns']: for p in json.load(fd)['patterns']:
if '\\\\1' in p or '(?!' in p: if '\\\\1' in p or '(?!' in p:
# RE2 does not support backreferences or other # RE2 does not support backreferences or other
# fancy PCRE constructs. This excludes <10 # fancy PCRE constructs. This excludes <10
# patterns from the ignore list. # patterns from the ignore list.
continue continue
print '\t%s,' % json.dumps(p) p = p.replace('{primary_netloc}', '.*')
except Exception, e: print('\t%s,' % json.dumps(p))
print >>sys.stderr, 'error in %s: %s' % (fn, e) except Exception as e:
print '}' print('error in %s: %s' % (fn, e), file=sys.stderr)
print('}')
This diff is collapsed.