Skip to content
Snippets Groups Projects
Commit 03de39dc authored by ale's avatar ale
Browse files

Exclude a bunch of unicode url-encoded patterns

parent 1050328b
No related branches found
No related tags found
No related merge requests found
Pipeline #44563 passed
......@@ -10,6 +10,7 @@
import glob
import json
import os
import re
import sys
archivebot_ignore_path = sys.argv[1]
......@@ -24,6 +25,8 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
# fancy PCRE constructs. This excludes <10
# patterns from the ignore list.
continue
if re.search(r'(?:%[0-9A-F]{2}){3,}', p):
continue
p = p.replace('{primary_netloc}', '.*')
print('\t%s,' % json.dumps(p))
except Exception as e:
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment