From 6c5901a2eca432fbc5657d10aa8d121036158d4c Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 29 Sep 2011 00:22:53 +0200
Subject: [PATCH] improve sanitization

---
 server/djrandom/scanner/metadata.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/server/djrandom/scanner/metadata.py b/server/djrandom/scanner/metadata.py
index 9c17590..2ef6262 100644
--- a/server/djrandom/scanner/metadata.py
+++ b/server/djrandom/scanner/metadata.py
@@ -6,9 +6,14 @@ import re
 # Compiling and including 'unicodedata' apparently makes it possible
 # to correctly interpret accented characters in unicode tag metadata.
 _nonalpha_pattern = re.compile(r'\W+', re.UNICODE)
+_spaces_pattern = re.compile(r'\s+', re.UNICODE)
 
 def normalize_string(s):
-    return _nonalpha_pattern.sub(' ', s).lower()
+    s = s.replace('_', ' ')
+    s = _nonalpha_pattern.sub(' ', s)
+    s = _spaces_pattern.sub(' ', s)
+    s = s.lower().strip()
+    return s
 
 
 def analyze_mp3(path):
-- 
GitLab