Skip to content
Snippets Groups Projects
Commit a4bd430e authored by ale's avatar ale
Browse files

add a new state for mp3 files which have missing metadata tags: BAD_METADATA;...

add a new state for mp3 files which have missing metadata tags: BAD_METADATA; also add a script that attempts to retrieve the missing information from EchoNest public servers
parent 5b7ecd36
Branches
No related tags found
No related merge requests found
import os
import optparse
import logging
import socket
import time
import traceback
import urllib2
from djrandom import daemonize
from djrandom import utils
from djrandom.model.mp3 import MP3
from djrandom.database import Session, init_db
from djrandom.scanner import metadata
log = logging.getLogger(__name__)
class NoMetadataError(Exception):
pass
class MetadataFixer(object):
ECHONEST_API_URL = 'http://developer.echonest.com/api/v4/song/identify'
def __init__(self, echonest_api_key):
self.api_key = echonest_api_key
def identify_song(self, mp3):
json_fp = mp3.get_fingerprint()
while True:
req = urllib2.Request(
'%s?api_key=%s' % (ECHONEST_API_URL, self.api_key),
[('Content-Type', 'application/octet-stream')])
try:
result = json.loads(urllib2.urlopen(req).read())
response = result['response']
if response['status']['code'] != 0:
log.error('EchoNest API replied with code %d: %s' % (
response['status']['code'],
response['status']['message']))
raise NoMetadataError('API Error')
if not response['songs']:
log.info('no information found for %s' % mp3.sha1)
raise NoMetadataError('Not found')
return response['songs'][0]
except urllib2.HTTPError, e:
# HTTPErrors are fatal only in case of 4xx codes.
if e.code >= 400 and e.code < 500:
raise NoMetadataError('HTTP Error %d' % e.code)
except (urllib2.Error, socket.error), e:
log.error('API HTTP error: %s' % str(e))
def process(self, mp3):
info = self.identify_song(mp3)
mp3.title = metadata.normalize_string(info['title'])
mp3.artist = metadata.normalize_string(info['artist_name'])
def scan(self, run_once):
"""Scan the database for new files."""
while True:
mp3 = MP3.get_with_bad_metadata().limit(1).first()
if not mp3:
if run_once:
break
Session.remove()
time.sleep(600)
continue
log.info('searching metadata for %s' % mp3.sha1)
try:
self.process(mp3)
mp3.state = MP3.READY
except Exception, e:
log.error(traceback.format_exc())
mp3.state = MP3.ERROR
Session.add(mp3)
Session.commit()
def run_fixer(echonest_api_key, db_url, run_once):
socket.setdefaulttimeout(300)
init_db(db_url)
fixer = MetadataFixer(echonest_api_key)
fixer.scan(run_once)
def main():
parser = optparse.OptionParser()
parser.add_option('--once', action='store_true')
parser.add_option('--echonest_api_key')
parser.add_option('--db_url')
daemonize.add_standard_options(parser)
utils.read_config_defaults(
parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf'))
opts, args = parser.parse_args()
if not opts.db_url:
parser.error('Must provide --db_url')
if not echonest_api_key:
parser.error('Must provide --echonest_api_key')
if args:
parser.error('Too many arguments')
if opts.once:
opts.foreground = True
daemonize.daemonize(opts, run_fixer,
(opts.echonest_api_key, opts.db_url, opts.once))
if __name__ == '__main__':
main()
......@@ -30,6 +30,7 @@ class MP3(Base):
READY = 'R'
ERROR = 'E'
DUPLICATE = 'D'
BAD_METADATA = 'M'
sha1 = Column(String(40), primary_key=True)
state = Column(String(1), default=INCOMING, index=True)
......@@ -95,7 +96,14 @@ class MP3(Base):
@classmethod
def get_with_no_fingerprint(cls):
return cls.query.filter_by(state=cls.READY, has_fingerprint=0)
return cls.query.filter(((cls.state == cls.READY)
| (cls.state == cls.BAD_METADATA))
& (cls.has_fingerprint == 0))
@classmethod
def get_with_bad_metadata(cls):
return cls.query.filter_by(state=cls.BAD_METADATA,
has_fingerprint=1)
@classmethod
def last_uploaded(cls, n=10):
......
......@@ -13,6 +13,10 @@ from djrandom.scanner import indexer
log = logging.getLogger(__name__)
class BadMetadataError(Exception):
pass
class Scanner(object):
def __init__(self, solr_url):
......@@ -20,6 +24,8 @@ class Scanner(object):
def process(self, mp3):
mp3_info = metadata.analyze_mp3(mp3.path)
if not mp3_info['artist'] or not mp3_info['title']:
raise BadMetadataError()
for key, value in mp3_info.iteritems():
setattr(mp3, key, value)
self.idx.add_mp3(mp3)
......@@ -40,6 +46,9 @@ class Scanner(object):
try:
self.process(mp3)
mp3.state = MP3.READY
except BadMetadataError:
log.info('bad metadata for %s' % mp3.sha1)
mp3.state = MP3_BAD_METADATA
except Exception, e:
log.error(traceback.format_exc())
mp3.state = MP3.ERROR
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment