diff --git a/server/djrandom/metadata_fixer/metadata_fixer.py b/server/djrandom/metadata_fixer/metadata_fixer.py new file mode 100644 index 0000000000000000000000000000000000000000..de76d2c223bf88ed269a88a41e9dff262a1c2dde --- /dev/null +++ b/server/djrandom/metadata_fixer/metadata_fixer.py @@ -0,0 +1,117 @@ +import os +import optparse +import logging +import socket +import time +import traceback +import urllib2 +from djrandom import daemonize +from djrandom import utils +from djrandom.model.mp3 import MP3 +from djrandom.database import Session, init_db +from djrandom.scanner import metadata + +log = logging.getLogger(__name__) + + +class NoMetadataError(Exception): + pass + + +class MetadataFixer(object): + + ECHONEST_API_URL = 'http://developer.echonest.com/api/v4/song/identify' + + def __init__(self, echonest_api_key): + self.api_key = echonest_api_key + + def identify_song(self, mp3): + json_fp = mp3.get_fingerprint() + while True: + + req = urllib2.Request( + '%s?api_key=%s' % (ECHONEST_API_URL, self.api_key), + [('Content-Type', 'application/octet-stream')]) + + try: + result = json.loads(urllib2.urlopen(req).read()) + response = result['response'] + if response['status']['code'] != 0: + log.error('EchoNest API replied with code %d: %s' % ( + response['status']['code'], + response['status']['message'])) + raise NoMetadataError('API Error') + if not response['songs']: + log.info('no information found for %s' % mp3.sha1) + raise NoMetadataError('Not found') + return response['songs'][0] + + except urllib2.HTTPError, e: + # HTTPErrors are fatal only in case of 4xx codes. + if e.code >= 400 and e.code < 500: + raise NoMetadataError('HTTP Error %d' % e.code) + + except (urllib2.Error, socket.error), e: + log.error('API HTTP error: %s' % str(e)) + + def process(self, mp3): + info = self.identify_song(mp3) + mp3.title = metadata.normalize_string(info['title']) + mp3.artist = metadata.normalize_string(info['artist_name']) + + def scan(self, run_once): + """Scan the database for new files.""" + while True: + mp3 = MP3.get_with_bad_metadata().limit(1).first() + if not mp3: + if run_once: + break + Session.remove() + time.sleep(600) + continue + log.info('searching metadata for %s' % mp3.sha1) + try: + self.process(mp3) + mp3.state = MP3.READY + except Exception, e: + log.error(traceback.format_exc()) + mp3.state = MP3.ERROR + Session.add(mp3) + Session.commit() + + + +def run_fixer(echonest_api_key, db_url, run_once): + socket.setdefaulttimeout(300) + + init_db(db_url) + fixer = MetadataFixer(echonest_api_key) + fixer.scan(run_once) + + +def main(): + parser = optparse.OptionParser() + parser.add_option('--once', action='store_true') + parser.add_option('--echonest_api_key') + parser.add_option('--db_url') + daemonize.add_standard_options(parser) + utils.read_config_defaults( + parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf')) + opts, args = parser.parse_args() + if not opts.db_url: + parser.error('Must provide --db_url') + if not echonest_api_key: + parser.error('Must provide --echonest_api_key') + if args: + parser.error('Too many arguments') + + if opts.once: + opts.foreground = True + + daemonize.daemonize(opts, run_fixer, + (opts.echonest_api_key, opts.db_url, opts.once)) + + +if __name__ == '__main__': + main() + diff --git a/server/djrandom/model/mp3.py b/server/djrandom/model/mp3.py index ceef2d3de180cd456e361c75c2358d218e0d98ed..3f70280c30549a664682ad58eb00f017b1d78c26 100644 --- a/server/djrandom/model/mp3.py +++ b/server/djrandom/model/mp3.py @@ -30,6 +30,7 @@ class MP3(Base): READY = 'R' ERROR = 'E' DUPLICATE = 'D' + BAD_METADATA = 'M' sha1 = Column(String(40), primary_key=True) state = Column(String(1), default=INCOMING, index=True) @@ -95,7 +96,14 @@ class MP3(Base): @classmethod def get_with_no_fingerprint(cls): - return cls.query.filter_by(state=cls.READY, has_fingerprint=0) + return cls.query.filter(((cls.state == cls.READY) + | (cls.state == cls.BAD_METADATA)) + & (cls.has_fingerprint == 0)) + + @classmethod + def get_with_bad_metadata(cls): + return cls.query.filter_by(state=cls.BAD_METADATA, + has_fingerprint=1) @classmethod def last_uploaded(cls, n=10): diff --git a/server/djrandom/scanner/scanner.py b/server/djrandom/scanner/scanner.py index 9fdfddd927bf37541b4fbdf76908513708ad24c1..b8f992cb926e93d0e0390e2426f11c0ab362d625 100644 --- a/server/djrandom/scanner/scanner.py +++ b/server/djrandom/scanner/scanner.py @@ -13,6 +13,10 @@ from djrandom.scanner import indexer log = logging.getLogger(__name__) +class BadMetadataError(Exception): + pass + + class Scanner(object): def __init__(self, solr_url): @@ -20,6 +24,8 @@ class Scanner(object): def process(self, mp3): mp3_info = metadata.analyze_mp3(mp3.path) + if not mp3_info['artist'] or not mp3_info['title']: + raise BadMetadataError() for key, value in mp3_info.iteritems(): setattr(mp3, key, value) self.idx.add_mp3(mp3) @@ -40,6 +46,9 @@ class Scanner(object): try: self.process(mp3) mp3.state = MP3.READY + except BadMetadataError: + log.info('bad metadata for %s' % mp3.sha1) + mp3.state = MP3_BAD_METADATA except Exception, e: log.error(traceback.format_exc()) mp3.state = MP3.ERROR