Skip to content
Snippets Groups Projects
Commit f00e38b8 authored by ale's avatar ale
Browse files

update SOLR on mp3 state changes (with a little refactoring)

parent 0d4a7122
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@ from sqlalchemy import create_engine
from sqlalchemy.interfaces import PoolListener
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from djrandom.model.indexer import Indexer
Session = scoped_session(sessionmaker(autocommit=False,
......@@ -9,23 +10,32 @@ Session = scoped_session(sessionmaker(autocommit=False,
Base = declarative_base()
Base.query = Session.query_property()
indexer = Indexer()
# We are storing paths as binary blobs, without forcing a charset
# encoding. SQLAlchemy needs this class to safely do so.
# encoding. SQLAlchemy/MySQL needs this class to safely do so.
class SetTextFactory(PoolListener):
def connect(self, dbapi_con, con_record):
dbapi_con.text_factory = str
def init_db(uri):
def init_db(uri, solr_url=None):
# Import all ORM modules here, so that 'create_all' can find them.
from djrandom.model import mp3, playlist
if uri.startswith('mysql://'):
engine = create_engine(uri, listeners=[SetTextFactory()],
pool_recycle=1800)
else:
engine = create_engine(uri, pool_recycle=1800)
Session.configure(bind=engine)
Base.metadata.create_all(engine)
# You can omit 'solr_url' if the program uses the db read-only.
if solr_url:
indexer.set_url(solr_url)
return engine
......@@ -91,17 +91,22 @@ class CheapDeDuper(object):
best = self._resolver.resolve_dupes([s.sha1 for s in songs])
print '\n * best: %s\n' % (best, )
def commit(self):
self._resolver.commit()
def run_cheap_deduper(db_url, dry_run):
engine = init_db(db_url)
def run_cheap_deduper(db_url, solr_url, dry_run):
engine = init_db(db_url, solr_url)
dup = CheapDeDuper()
#dup.dedupe(dry_run)
dup.dedupe_fp(engine)
if not dry_run:
dup.commit()
def main():
parser = optparse.OptionParser()
parser.add_option('--db_url')
parser.add_option('--solr_url', default='http://localhost:8080/solr')
parser.add_option('--apply', action='store_true')
daemonize.add_standard_options(parser)
utils.read_config_defaults(
......@@ -113,7 +118,7 @@ def main():
parser.error('Too many arguments')
daemonize.daemonize(opts, run_cheap_deduper,
(opts.db_url, not opts.apply))
(opts.db_url, opts.solr_url, not opts.apply))
if __name__ == '__main__':
......
......@@ -152,8 +152,8 @@ class DeDuper(object):
self._resolver.commit()
def run_deduper(db_url, dry_run):
init_db(db_url)
def run_deduper(db_url, solr_url, dry_run):
init_db(db_url, solr_url)
dup = DeDuper()
dup.dedupe(dry_run)
......@@ -161,6 +161,7 @@ def run_deduper(db_url, dry_run):
def main():
parser = optparse.OptionParser()
parser.add_option('--db_url')
parser.add_option('--solr_url', default='http://localhost:8080/solr')
parser.add_option('--apply', action='store_true')
daemonize.add_standard_options(parser)
utils.read_config_defaults(
......@@ -172,7 +173,7 @@ def main():
parser.error('Too many arguments')
daemonize.daemonize(opts, run_deduper,
(opts.db_url, not opts.apply))
(opts.db_url, opts.solr_url, not opts.apply))
if __name__ == '__main__':
......
import logging
from djrandom.database import Session
from djrandom.database import Session, indexer
from djrandom.model.mp3 import MP3
from djrandom.fingerprint.compare_songs import sort_songs
......@@ -41,5 +41,7 @@ class Resolver(object):
mp3 = MP3.query.get(sha1)
mp3.mark_as_duplicate(duplicate_of)
Session.add(mp3)
indexer.add_mp3(mp3)
Session.commit()
indexer.commit()
......@@ -20,7 +20,7 @@ log = logging.getLogger(__name__)
def run_frontend(port, solr_url, db_url, lastfm_api_key, album_art_dir,
email_sender, markov_data_file, do_profile):
init_db(db_url)
init_db(db_url, solr_url)
svcs['searcher'] = Searcher(solr_url)
svcs['album_images'] = AlbumImageRetriever(lastfm_api_key, album_art_dir)
......
import solr
from collections import defaultdict
from djrandom.model.mp3 import MP3
class Searcher(object):
......
import solr
class Indexer(object):
"""A very simple wrapper for Solr that supports lazy inizialization."""
def __init__(self, solr_url=None):
self._solr = None
self._solr_url = solr_url
def set_url(self, solr_url):
self._solr_url = solr_url
def _get_solr(self):
if not self._solr:
self._solr = solr.Solr(self._solr_url, timeout=30)
return self._solr
def add_mp3(self, mp3):
# Almost equal to mp3.to_dict() but not exactly (SOLR calls 'id'
# what the database calls 'sha1').
if mp3.state == mp3.READY:
self._get_solr().add({
'id': mp3.sha1,
'artist': mp3.artist,
'album': mp3.album,
'title': mp3.title,
'genre': mp3.genre})
else:
self._get_solr().delete(mp3.sha1)
def del_mp3(self, mp3):
self._get_solr().delete(mp3.sha1)
def commit(self):
self._get_solr().commit()
import solr
class Indexer(object):
def __init__(self, solr_url):
self.solr = solr.Solr(solr_url, timeout=30)
def add_mp3(self, mp3):
# _almost_ equal to mp3.to_dict() but not quite (sha1/id mismatch).
doc = {'id': mp3.sha1,
'artist': mp3.artist,
'album': mp3.album,
'title': mp3.title,
'genre': mp3.genre}
self.solr.add(doc)
def commit(self):
self.solr.commit()
......@@ -6,9 +6,8 @@ import traceback
from djrandom import daemonize
from djrandom import utils
from djrandom.model.mp3 import MP3
from djrandom.database import Session, init_db
from djrandom.database import Session, init_db, indexer
from djrandom.scanner import metadata
from djrandom.scanner import indexer
log = logging.getLogger(__name__)
......@@ -19,16 +18,12 @@ class BadMetadataError(Exception):
class Scanner(object):
def __init__(self, solr_url):
self.idx = indexer.Indexer(solr_url)
def process(self, mp3):
mp3_info = metadata.analyze_mp3(mp3.path)
if not mp3_info['artist'] or not mp3_info['title']:
raise BadMetadataError()
for key, value in mp3_info.iteritems():
setattr(mp3, key, value)
self.idx.add_mp3(mp3)
def scan_db(self, run_once):
"""Scan the database for new files."""
......@@ -39,7 +34,7 @@ class Scanner(object):
if run_once:
break
Session.remove()
self.idx.commit()
indexer.commit()
time.sleep(60)
continue
log.info('processing %s' % mp3.sha1)
......@@ -52,12 +47,13 @@ class Scanner(object):
except Exception, e:
log.error(traceback.format_exc())
mp3.state = MP3.ERROR
indexer.add_mp3(mp3)
Session.add(mp3)
Session.commit()
def run_scanner(solr_url, db_url, run_once):
init_db(db_url)
init_db(db_url, solr_url)
scanner = Scanner(solr_url)
scanner.scan_db(run_once)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment