From 66f058c05b75e0e0031cb8d0a2994598724b914c Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Mon, 5 Dec 2022 20:55:05 +0000 Subject: [PATCH] Ignore patterns with unsupported Perl regex constructs --- gen-ignores.py | 2 +- ignore_patterns.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gen-ignores.py b/gen-ignores.py index e0dae51..cd372f8 100755 --- a/gen-ignores.py +++ b/gen-ignores.py @@ -20,7 +20,7 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')): with open(fn) as fd: print('\n\t// %s' % os.path.basename(fn)) for p in json.load(fd)['patterns']: - if re.search(r'\\[0-9]', p) or '(?!' in p: + if re.search(r'\\[0-9]', p) or ('(?!' in p) or ('(?=' in p): # RE2 does not support backreferences or other # fancy PCRE constructs. This excludes <10 # patterns from the ignore list. diff --git a/ignore_patterns.go b/ignore_patterns.go index 319b294..b44476c 100644 --- a/ignore_patterns.go +++ b/ignore_patterns.go @@ -146,7 +146,6 @@ var defaultIgnorePatterns = []string{ "/discover\\?((.*&)?filtertype(_\\d+)?=){2}", "/search-filter\\?(.*&)?filtertype(_\\d+)?=", "/simple-search\\?((.*&)?(filter_type(_\\d+)?|filtertype)=){2}", - "/simple-search\\?(?=(.*&)?(filter_type(_\\d+)?|filtertype)=)(.*&)?(author|subject|dateIssued)_page=", "[?&]dateIssued_page=\\d{2,}(&|$)", "[?&]starts_with=", -- GitLab