diff --git a/server/djrandom/fingerprint/dedup.py b/server/djrandom/fingerprint/dedup.py index 3d98b2f1535b75a02e69bc64487431f0595f54e1..0b4286b7c960f7c93f1987421d32a0a4bbc0b59d 100644 --- a/server/djrandom/fingerprint/dedup.py +++ b/server/djrandom/fingerprint/dedup.py @@ -136,22 +136,54 @@ class DeDuper(object): # Actually de-duplicate the songs we've found. self._resolve_dupes([x[0] for x in dupes]) + def _get_song_score(self, mp3): + af = eyeD3.Mp3AudioFile(mp3.path) + + # Get encoding parameters. + bitrate = af.getBitRate()[1] + duration = 30 * (int(af.getPlayTime()) / 30) # round to 30 secs + + # Count metadata tags. + tag = af.getTag() + has_album = not (not tag.getAlbum()) + has_artist = not (not tag.getArtist()) + has_title = not (not tag.getTitle()) + has_genre = not (not tag.getGenre()) + has_year = not (not tag.getYear()) + has_tracknum = (tag.getTrackNum()[0] is not None) + has_images = not (not tag.getImages()) + num_meta = (4 * int(has_images) + + 2 * sum(map(int, (has_album, has_artist, has_title))) + + sum(map(int, (has_genre, has_year, has_tracknum)))) + + return (bitrate, duration, num_meta) + def _resolve_dupes(self, hashes): """Perform best duplicate selection and remove dupes from db.""" log.debug('remove_dupes(%s)' % ','.join(hashes)) + def _compare_score(a, b): + a_bitrate, a_duration, a_nmeta = a + b_bitrate, b_duration, b_nmeta = b + res = cmp(a_bitrate, b_bitrate) + if res == 0: + res = cmp(a_duration, b_duration) + if res == 0: + res = cmp(a_nmeta, b_nmeta) + return res + # Compute 'score' for each song and sort them. scores = [] mp3s = MP3.query.filter(MP3.sha1 in hashes) for mp3 in mp3s: scores.append((self._get_song_score(mp3), mp3.sha1)) - scores.sort(key=lambda x: x[0]) + scores.sort(cmp=_compare_score, reverse=True) best_song = scores[0][1] log.debug('remove_dupes: best song is %s' % best_song) # Remove all the other songs. songs_to_remove = [x for x in hashes if x != best_song] - + log.info('remove_dupes: songs to remove: %s' % (','.join(songs_to_remove))) def run_deduper(db_url):