Skip to content
Snippets Groups Projects
Commit 179eb138 authored by ale's avatar ale
Browse files

Update gen-ignores to py3, and regenerate ignore list

parent fa3a9fb8
No related branches found
No related tags found
No related merge requests found
......@@ -13,19 +13,19 @@ import os
import sys
archivebot_ignore_path = sys.argv[1]
print 'package crawl\n\nvar defaultIgnorePatterns = []string{'
print('package crawl\n\nvar defaultIgnorePatterns = []string{')
for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
try:
with open(fn) as fd:
print '\n\t// %s' % os.path.basename(fn)
print('\n\t// %s' % os.path.basename(fn))
for p in json.load(fd)['patterns']:
if '\\\\1' in p or '(?!' in p:
# RE2 does not support backreferences or other
# fancy PCRE constructs. This excludes <10
# patterns from the ignore list.
continue
print '\t%s,' % json.dumps(p)
except Exception, e:
print >>sys.stderr, 'error in %s: %s' % (fn, e)
print '}'
p = p.replace('{primary_netloc}', '.*')
print('\t%s,' % json.dumps(p))
except Exception as e:
print('error in %s: %s' % (fn, e), file=sys.stderr)
print('}')
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment