diff --git a/server/djrandom/fingerprint/dedup.py b/server/djrandom/fingerprint/dedup.py index 585a16a3f4199229e0866d072f129a1b666e9cb5..86ce482509d4c2186b686943be99a7d074bfede6 100644 --- a/server/djrandom/fingerprint/dedup.py +++ b/server/djrandom/fingerprint/dedup.py @@ -7,9 +7,10 @@ import optparse import logging import json import time +from sqlalchemy import select from djrandom import daemonize from djrandom import utils -from djrandom.model.mp3 import MP3 +from djrandom.model.mp3 import MP3, Fingerprint from djrandom.database import Session, init_db from djrandom.fingerprint.resolve_duplicates import Resolver @@ -18,7 +19,8 @@ log = logging.getLogger(__name__) class DeDuper(object): - def __init__(self): + def __init__(self, engine): + self._engine = engine self._resolver = Resolver() def _generate_code_json(self, jsondata, track_id): @@ -69,7 +71,7 @@ class DeDuper(object): (MP3.sha1 == Fingerprint.sha1) & (MP3.state == MP3.READY) & (MP3.has_fingerprint == True)) - for row in engine.execute(q): + for row in self._engine.execute(q): code = self._generate_code_json(row.echoprint_fp, row.sha1) if not code: continue @@ -156,8 +158,8 @@ class DeDuper(object): def run_deduper(db_url, solr_url, dry_run): - init_db(db_url, solr_url) - dup = DeDuper() + engine = init_db(db_url, solr_url) + dup = DeDuper(engine) dup.dedupe(dry_run)