diff --git a/gen-ignores.py b/gen-ignores.py index e0dae51eac4a7cb40a6a8f11d070f51f72edbac1..cd372f841458ee803955e6b33d757ed497e89952 100755 --- a/gen-ignores.py +++ b/gen-ignores.py @@ -20,7 +20,7 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')): with open(fn) as fd: print('\n\t// %s' % os.path.basename(fn)) for p in json.load(fd)['patterns']: - if re.search(r'\\[0-9]', p) or '(?!' in p: + if re.search(r'\\[0-9]', p) or ('(?!' in p) or ('(?=' in p): # RE2 does not support backreferences or other # fancy PCRE constructs. This excludes <10 # patterns from the ignore list. diff --git a/ignore_patterns.go b/ignore_patterns.go index 319b294823aa7b9327a33e5e8d5a1023a84db675..b44476c8b9f20828059abaf082c5cba77f426043 100644 --- a/ignore_patterns.go +++ b/ignore_patterns.go @@ -146,7 +146,6 @@ var defaultIgnorePatterns = []string{ "/discover\\?((.*&)?filtertype(_\\d+)?=){2}", "/search-filter\\?(.*&)?filtertype(_\\d+)?=", "/simple-search\\?((.*&)?(filter_type(_\\d+)?|filtertype)=){2}", - "/simple-search\\?(?=(.*&)?(filter_type(_\\d+)?|filtertype)=)(.*&)?(author|subject|dateIssued)_page=", "[?&]dateIssued_page=\\d{2,}(&|$)", "[?&]starts_with=",