Skip to content
Snippets Groups Projects
Commit 6c5901a2 authored by ale's avatar ale
Browse files

improve sanitization

parent d4de1c85
No related branches found
No related tags found
No related merge requests found
......@@ -6,9 +6,14 @@ import re
# Compiling and including 'unicodedata' apparently makes it possible
# to correctly interpret accented characters in unicode tag metadata.
_nonalpha_pattern = re.compile(r'\W+', re.UNICODE)
_spaces_pattern = re.compile(r'\s+', re.UNICODE)
def normalize_string(s):
return _nonalpha_pattern.sub(' ', s).lower()
s = s.replace('_', ' ')
s = _nonalpha_pattern.sub(' ', s)
s = _spaces_pattern.sub(' ', s)
s = s.lower().strip()
return s
def analyze_mp3(path):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment