diff --git a/server/djrandom/fingerprint/__init__.py b/server/djrandom/fingerprint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/djrandom/fingerprint/fingerprint.py b/server/djrandom/fingerprint/fingerprint.py new file mode 100644 index 0000000000000000000000000000000000000000..d8d96cf6db5b0a2ae1f8605dd77484e0ab13d432 --- /dev/null +++ b/server/djrandom/fingerprint/fingerprint.py @@ -0,0 +1,81 @@ +import os +import optparse +import logging +import subprocess +import time +import traceback +from djrandom import daemonize +from djrandom import utils +from djrandom.model.mp3 import MP3 +from djrandom.database import Session, init_db + +log = logging.getLogger(__name__) + + +class Fingerprinter(object): + + def __init__(self, codegen_path): + self.codegen_path = codegen_path + + def process(self, mp3): + pipe = subprocess.Popen( + [self.codegen_path, mp3.path, '10', '30'], + close_fds=False, + stdout=subprocess.PIPE) + fp_json = pipe.communicate()[0] + if fp_json: + # Remove the square brackets that make fp_json an array. + # (Ugly Hack!) + mp3.echoprint_fp = fp_json[2:-2] + + def compute_fingerprints(self, run_once): + """Compute fingerprints of new files.""" + while True: + mp3 = MP3.query.filter(MP3.echoprint_fp.null() + ).limit(1).first() + if not mp3: + if run_once: + break + Session.remove() + self.idx.commit() + time.sleep(60) + continue + log.info('fingerprinting %s' % mp3.sha1) + try: + self.process(mp3) + except Exception, e: + log.error(traceback.format_exc()) + Session.add(mp3) + Session.commit() + + +def run_fingerprinter(db_url, codegen_path, run_once): + init_db(db_url) + scanner = Fingerprinter(codegen_path) + scanner.compute_fingerprints(run_once) + + +def main(): + parser = optparse.OptionParser() + parser.add_option('--once', action='store_true') + parser.add_option('--codegen_path', + default='/usr/local/bin/echoprint-codegen') + parser.add_option('--db_url') + daemonize.add_standard_options(parser) + utils.read_config_defaults( + parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf')) + opts, args = parser.parse_args() + if not opts.db_url: + parser.error('Must provide --db_url') + if args: + parser.error('Too many arguments') + + if opts.once: + opts.foreground = True + + daemonize.daemonize(opts, run_fingerprinter, + (opts.db_url, opts.codegen_path, opts.once)) + + +if __name__ == '__main__': + main() diff --git a/server/djrandom/model/mp3.py b/server/djrandom/model/mp3.py index f9314351cc7313dd1b1470c98a74443a85e450cf..68657c7686cea86ead20af9d0bcbf72708937b0f 100644 --- a/server/djrandom/model/mp3.py +++ b/server/djrandom/model/mp3.py @@ -1,4 +1,5 @@ import random +from sqlalchemy.orm import deferred from sqlalchemy import * from datetime import datetime, timedelta from djrandom.database import Base, Session @@ -23,6 +24,7 @@ class MP3(Base): genre = Column(Unicode(64)) uploaded_at = Column(DateTime()) play_count = Column(Integer(), default=0) + echoprint_fp = deferred(Column(Text())) def __init__(self, **kw): for k, v in kw.items(): diff --git a/server/setup.py b/server/setup.py index 559cc53491464858a8fbe027d89bba4ecc7435e0..4da049474ed4a573feff3d20dd84c2ae73d84c29 100644 --- a/server/setup.py +++ b/server/setup.py @@ -17,6 +17,7 @@ setup( "console_scripts": [ "djrandom-receiver = djrandom.receiver.receiver:main", "djrandom-scanner = djrandom.scanner.scanner:main", + "djrandom-fingerprinter = djrandom.fingerprint.fingerprint:main", "djrandom-streamer = djrandom.stream.stream:main", "djrandom-frontend = djrandom.frontend.frontend:main", "djrandom-update-markov = djrandom.model.markov:main",