diff --git a/server/djrandom/scanner/metadata.py b/server/djrandom/scanner/metadata.py index 9c17590392eae8683da14d2ce061ffe32462ffd3..2ef62620d1e0be0dfc64cf0250e813d289082372 100644 --- a/server/djrandom/scanner/metadata.py +++ b/server/djrandom/scanner/metadata.py @@ -6,9 +6,14 @@ import re # Compiling and including 'unicodedata' apparently makes it possible # to correctly interpret accented characters in unicode tag metadata. _nonalpha_pattern = re.compile(r'\W+', re.UNICODE) +_spaces_pattern = re.compile(r'\s+', re.UNICODE) def normalize_string(s): - return _nonalpha_pattern.sub(' ', s).lower() + s = s.replace('_', ' ') + s = _nonalpha_pattern.sub(' ', s) + s = _spaces_pattern.sub(' ', s) + s = s.lower().strip() + return s def analyze_mp3(path):