Select Git revision
-
Robert J. Newmark authoredRobert J. Newmark authored
metadata_fixer.py 4.07 KiB
import json
import os
import optparse
import logging
import socket
import time
import traceback
import urllib2
from djrandom import daemonize
from djrandom import utils
from djrandom.model.mp3 import MP3
from djrandom.database import Session, init_db, indexer
from djrandom.scanner import metadata
log = logging.getLogger(__name__)
class NoMetadataError(Exception):
pass
class MetadataFixer(object):
ECHONEST_API_URL = 'http://developer.echonest.com/api/v4/song/identify'
def __init__(self, solr_url, echonest_api_key):
self.api_key = echonest_api_key
def identify_song(self, mp3):
json_fp = mp3.get_fingerprint()
while True:
req = urllib2.Request(
'%s?api_key=%s' % (self.ECHONEST_API_URL, self.api_key),
data=json_fp,
headers={'Content-Type': 'application/octet-stream'})
try:
result = json.loads(urllib2.urlopen(req).read())
response = result['response']
logging.debug('response:\n%s' % str(response))
if response['status']['code'] != 0:
log.error('EchoNest API replied with code %d: %s' % (
response['status']['code'],
response['status']['message']))
raise NoMetadataError('API Error')
if not response['songs']:
log.info('no information found for %s' % mp3.sha1)
raise NoMetadataError('Not found')
return response['songs'][0]
except urllib2.HTTPError, e:
# HTTPErrors are fatal only in case of 4xx codes.
if e.code >= 400 and e.code < 500:
raise NoMetadataError('HTTP Error %d' % e.code)
log.debug('retrying...')
def process(self, mp3):
info = self.identify_song(mp3)
mp3.title = metadata.normalize_string(info['title'])
mp3.artist = metadata.normalize_string(info['artist_name'])
def scan(self):
"""Scan the database for new files."""
n_bad = n_ok = n_err = 0
for mp3 in MP3.get_with_bad_metadata():
n_bad += 1
log.info('searching metadata for %s' % mp3.sha1)
try:
self.process(mp3)
mp3.state = MP3.READY
log.info('found: %s / %s' % (mp3.artist, mp3.title))
n_ok += 1
except NoMetadataError:
mp3.state = MP3.ERROR
n_err += 1
except Exception, e:
log.error(traceback.format_exc())
n_err += 1
mp3.state = MP3.ERROR
indexer.add_mp3(mp3)
Session.add(mp3)
Session.commit()
indexer.commit()
log.debug('total: %d songs, found: %d' % (n_bad, n_ok))
def run(self, run_once):
while True:
self.scan()
if run_once:
break
Session.remove()
time.sleep(600)
def run_fixer(solr_url, echonest_api_key, db_url, run_once):
socket.setdefaulttimeout(300)
init_db(db_url, solr_url)
fixer = MetadataFixer(echonest_api_key)
fixer.run(run_once)
def main():
parser = optparse.OptionParser()
parser.add_option('--once', action='store_true')
parser.add_option('--solr_url', default='http://localhost:8080/solr')
parser.add_option('--echonest_api_key')
parser.add_option('--db_url')
daemonize.add_standard_options(parser)
utils.read_config_defaults(
parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf'))
opts, args = parser.parse_args()
if not opts.db_url:
parser.error('Must provide --db_url')
if not opts.echonest_api_key:
parser.error('Must provide --echonest_api_key')
if args:
parser.error('Too many arguments')
if opts.once:
opts.foreground = True
daemonize.daemonize(opts, run_fixer,
(opts.solr_url, opts.echonest_api_key,
opts.db_url, opts.once))
if __name__ == '__main__':
main()