From 66f058c05b75e0e0031cb8d0a2994598724b914c Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Mon, 5 Dec 2022 20:55:05 +0000
Subject: [PATCH] Ignore patterns with unsupported Perl regex constructs

---
 gen-ignores.py     | 2 +-
 ignore_patterns.go | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gen-ignores.py b/gen-ignores.py
index e0dae51..cd372f8 100755
--- a/gen-ignores.py
+++ b/gen-ignores.py
@@ -20,7 +20,7 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
         with open(fn) as fd:
             print('\n\t// %s' % os.path.basename(fn))
             for p in json.load(fd)['patterns']:
-                if re.search(r'\\[0-9]', p) or '(?!' in p:
+                if re.search(r'\\[0-9]', p) or ('(?!' in p) or ('(?=' in p):
                     # RE2 does not support backreferences or other
                     # fancy PCRE constructs. This excludes <10
                     # patterns from the ignore list.
diff --git a/ignore_patterns.go b/ignore_patterns.go
index 319b294..b44476c 100644
--- a/ignore_patterns.go
+++ b/ignore_patterns.go
@@ -146,7 +146,6 @@ var defaultIgnorePatterns = []string{
 	"/discover\\?((.*&)?filtertype(_\\d+)?=){2}",
 	"/search-filter\\?(.*&)?filtertype(_\\d+)?=",
 	"/simple-search\\?((.*&)?(filter_type(_\\d+)?|filtertype)=){2}",
-	"/simple-search\\?(?=(.*&)?(filter_type(_\\d+)?|filtertype)=)(.*&)?(author|subject|dateIssued)_page=",
 	"[?&]dateIssued_page=\\d{2,}(&|$)",
 	"[?&]starts_with=",
 
-- 
GitLab