From df2c003e6c7375cc2a30e9c05d68e74d2b604229 Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Sun, 20 Nov 2011 14:53:08 +0000 Subject: [PATCH] query fingerprints in a more efficient way --- server/djrandom/fingerprint/dedup.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/server/djrandom/fingerprint/dedup.py b/server/djrandom/fingerprint/dedup.py index daf8b89..585a16a 100644 --- a/server/djrandom/fingerprint/dedup.py +++ b/server/djrandom/fingerprint/dedup.py @@ -64,15 +64,16 @@ class DeDuper(object): log.debug('loading in-memory fingerprint database...') start = time.time() fp.erase_database(local=True, really_delete=True) - mp3s = MP3.query.filter( - (MP3.artist == u'bonobo') - & (MP3.state == MP3.READY) - & (MP3.has_fingerprint == True)) - for mp3 in mp3s: - code = self._generate_code_json(mp3.echoprint_fp, mp3.sha1) + # Skip the ORM and directly query the SQL layer. + q = select([Fingerprint.sha1, Fingerprint.echoprint_fp], + (MP3.sha1 == Fingerprint.sha1) + & (MP3.state == MP3.READY) + & (MP3.has_fingerprint == True)) + for row in engine.execute(q): + code = self._generate_code_json(row.echoprint_fp, row.sha1) if not code: continue - self.codes[mp3.sha1] = code['fp'] + self.codes[row.sha1] = code['fp'] fp.ingest(code, do_commit=False, local=True) elapsed = time.time() - start log.debug('loaded in-memory fingerprint database in %g seconds' % elapsed) -- GitLab