From 6c5901a2eca432fbc5657d10aa8d121036158d4c Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Thu, 29 Sep 2011 00:22:53 +0200 Subject: [PATCH] improve sanitization --- server/djrandom/scanner/metadata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/djrandom/scanner/metadata.py b/server/djrandom/scanner/metadata.py index 9c17590..2ef6262 100644 --- a/server/djrandom/scanner/metadata.py +++ b/server/djrandom/scanner/metadata.py @@ -6,9 +6,14 @@ import re # Compiling and including 'unicodedata' apparently makes it possible # to correctly interpret accented characters in unicode tag metadata. _nonalpha_pattern = re.compile(r'\W+', re.UNICODE) +_spaces_pattern = re.compile(r'\s+', re.UNICODE) def normalize_string(s): - return _nonalpha_pattern.sub(' ', s).lower() + s = s.replace('_', ' ') + s = _nonalpha_pattern.sub(' ', s) + s = _spaces_pattern.sub(' ', s) + s = s.lower().strip() + return s def analyze_mp3(path): -- GitLab