From 7d187c96ac318d5e0425576069f2a94138e17b69 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Sun, 22 Jan 2012 09:08:30 +0000
Subject: [PATCH] add a standalone server to run the timbre vector search db

---
 server/djrandom/mood/mood_db.py | 115 ++++++++++++++++++++++++++++++++
 server/setup.py                 |   1 +
 2 files changed, 116 insertions(+)
 create mode 100644 server/djrandom/mood/mood_db.py

diff --git a/server/djrandom/mood/mood_db.py b/server/djrandom/mood/mood_db.py
new file mode 100644
index 0000000..efa1031
--- /dev/null
+++ b/server/djrandom/mood/mood_db.py
@@ -0,0 +1,115 @@
+import heapq
+import logging
+import marsyas
+import optparse
+import os
+import threading
+import time
+from djrandom import daemonize
+from djrandom import utils
+from djrandom.mood import marsyas_utils
+from djrandom.mood import marsyas_c_utils
+from djrandom.model.mp3 import MP3, Features
+from djrandom.database import Session, init_db
+from flask import Flask, request, abort, jsonify
+from sqlalchemy import select
+
+log = logging.getLogger(__name__)
+app = Flask(__name__)
+
+
+class TimbreDb(object):
+
+    def __init__(self):
+        self._db = []
+
+    def load_data(self, dataiter):
+        self._db = list(dataiter)
+        log.debug('timbre data loaded')
+
+    def search(self, rv, n=10):
+        scores = [(0, None)] * n
+        for id, vector in self._db:
+            score = marsyas_c_utils.euclidean_distance(vector, rv)
+            heapq.heappushpop(scores, (score, id))
+        return scores
+
+
+class DbLoader(threading.Thread):
+
+    def __init__(self, engine, timbre_db):
+        threading.Thread.__init__(self)
+        self._engine = engine
+        self._timbre_db = timbre_db
+
+    def _updatedb(self):
+        # Bypass the SQLAlchemy ORM, and just run a huge SELECT query
+        # to reduce the memory footprint.
+        q = select([Features.sha1, Features.timbre_vector],
+                   (MP3.sha1 == Features.sha1)
+                   & (MP3.state == MP3.READY)
+                   & (MP3.has_features == True))
+        features_iter = (
+            (x.sha1, marsyas_utils.deserialize_realvec(x.timbre_vector))
+            for x in self._engine.execute(q))
+        self._timbre_db.load_data(features_iter)
+
+    def run(self):
+        while True:
+            try:
+                self._updatedb()
+            except Exception, e:
+                log.error('error updating the features db: %s', e)
+            time.sleep(3600)
+
+
+@app.teardown_request
+def shutdown_dbsession(exception=None):
+    Session.remove()
+
+
+@app.route('/search/<sha1>')
+def search_handler(sha1):
+    n = int(request.args.get('n', 10))
+    mp3 = MP3.query.get(sha1)
+    if not mp3 or not mp3.has_features:
+        abort(404)
+
+    vector = marsyas_utils.deserialize_realvec(mp3.features.timbre_vector)
+    return jsonify(results=[
+            {'score': x[0], 'sha1': x[1]}
+            for x in app.timbre_db.search(vector, n)])
+
+
+def run_timbre_db(db_url, port):
+    engine = init_db(db_url)
+
+    timbre_db = TimbreDb()
+
+    loader = DbLoader(engine, timbre_db)
+    loader.setDaemon(True)
+    loader.start()
+
+    app.timbre_db = timbre_db
+    app.run(port)
+
+
+def main():
+    parser = optparse.OptionParser()
+    parser.add_option('--db_url')
+    parser.add_option('--port', type='int', default=4001)
+    daemonize.add_standard_options(parser)
+    utils.read_config_defaults(
+        parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf'))
+    opts, args = parser.parse_args()
+    if not opts.db_url:
+        parser.error('Must provide --db_url')
+    if args:
+        parser.error('Too many arguments')
+
+    daemonize.daemonize(opts, run_timbre_db,
+                        (opts.db_url, opts.port))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/server/setup.py b/server/setup.py
index f842c30..4f06ca9 100644
--- a/server/setup.py
+++ b/server/setup.py
@@ -33,6 +33,7 @@ setup(
       "djrandom-metadata-fixer = djrandom.metadata_fixer.metadata_fixer:main",
       "djrandom-solr-fixer = djrandom.model.verify:main",
       "djrandom-mood-scanner = djrandom.mood.mood_scanner:main",
+      "djrandom-mood-db = djrandom.mood.mood_db:main",
     ],
   },
   )
-- 
GitLab