Skip to content
Snippets Groups Projects
Commit 531247c2 authored by ale's avatar ale
Browse files

add preference rules when comparing duplicate songs, favoring 1) bitrate 2)...

add preference rules when comparing duplicate songs, favoring 1) bitrate 2) duration 3) number of metadata tags
parent f16a8d26
No related tags found
No related merge requests found
......@@ -136,22 +136,54 @@ class DeDuper(object):
# Actually de-duplicate the songs we've found.
self._resolve_dupes([x[0] for x in dupes])
def _get_song_score(self, mp3):
af = eyeD3.Mp3AudioFile(mp3.path)
# Get encoding parameters.
bitrate = af.getBitRate()[1]
duration = 30 * (int(af.getPlayTime()) / 30) # round to 30 secs
# Count metadata tags.
tag = af.getTag()
has_album = not (not tag.getAlbum())
has_artist = not (not tag.getArtist())
has_title = not (not tag.getTitle())
has_genre = not (not tag.getGenre())
has_year = not (not tag.getYear())
has_tracknum = (tag.getTrackNum()[0] is not None)
has_images = not (not tag.getImages())
num_meta = (4 * int(has_images)
+ 2 * sum(map(int, (has_album, has_artist, has_title)))
+ sum(map(int, (has_genre, has_year, has_tracknum))))
return (bitrate, duration, num_meta)
def _resolve_dupes(self, hashes):
"""Perform best duplicate selection and remove dupes from db."""
log.debug('remove_dupes(%s)' % ','.join(hashes))
def _compare_score(a, b):
a_bitrate, a_duration, a_nmeta = a
b_bitrate, b_duration, b_nmeta = b
res = cmp(a_bitrate, b_bitrate)
if res == 0:
res = cmp(a_duration, b_duration)
if res == 0:
res = cmp(a_nmeta, b_nmeta)
return res
# Compute 'score' for each song and sort them.
scores = []
mp3s = MP3.query.filter(MP3.sha1 in hashes)
for mp3 in mp3s:
scores.append((self._get_song_score(mp3), mp3.sha1))
scores.sort(key=lambda x: x[0])
scores.sort(cmp=_compare_score, reverse=True)
best_song = scores[0][1]
log.debug('remove_dupes: best song is %s' % best_song)
# Remove all the other songs.
songs_to_remove = [x for x in hashes if x != best_song]
log.info('remove_dupes: songs to remove: %s' % (','.join(songs_to_remove)))
def run_deduper(db_url):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment