diff --git a/server/djrandom/fingerprint/dedup.py b/server/djrandom/fingerprint/dedup.py
index dbd342fe58e0101e9cd24405efbe06ca9dfe894f..2d1ef5762bfd382bb3ca0eae1cbb703ee582466a 100644
--- a/server/djrandom/fingerprint/dedup.py
+++ b/server/djrandom/fingerprint/dedup.py
@@ -3,105 +3,149 @@ import os
 import optparse
 import logging
 import json
+import time
 from djrandom import daemonize
 from djrandom import utils
 from djrandom.model.mp3 import MP3
 from djrandom.database import Session, init_db
 
+log = logging.getLogger(__name__)
 
-# Taken from 'fastingest.py', with minor changes.
-def generate_code_json(jdata, track_id):
-    c = json.loads(jdata)
-    if "code" not in c:
-        return {}
-
-    code = c["code"]
-    m = c["metadata"]
-    length = m["duration"]
-    version = m["version"]
-    artist = m.get("artist", None)
-    title = m.get("title", None)
-    release = m.get("release", None)
-    decoded = fp.decode_code_string(code)
+
+class DeDuper(object):
+
+    def _generate_code_json(self, jsondata, track_id):
+        """Parse the JSON string output of echoprint-codegen, and return
+        a structure that fp.ingest() can deal with.
+
+        Taken from 'fastingest.py', with minor changes.
+        """
+        c = json.loads(jsondata)
+        if "code" not in c:
+            return {}
+
+        code = c["code"]
+        m = c["metadata"]
+        length = m["duration"]
+        version = m["version"]
+        artist = m.get("artist", None)
+        title = m.get("title", None)
+        release = m.get("release", None)
+        decoded = fp.decode_code_string(code)
+        
+        data = {"track_id": track_id,
+                "fp": decoded,
+                "length": length,
+                "codever": "%.2f" % version}
+        if artist: data["artist"] = artist
+        if release: data["release"] = release
+        if title: data["track"] = title
+        return data
+
+    def dedupe(self):
+        self._ingest()
+        self._scan_for_dupes()
+
+    def _ingest(self):
+        self.codes = {}
+        """Load all known fingerprints into the db.
+
+        Creates the {sha1: code_string} self.codes dictionary.
+        """
+        log.debug('loading in-memory fingerprint database...')
+        start = time.time()
+        fp.erase_database(local=True, really_delete=True)
+        mp3s = MP3.query.filter(
+            (MP3.artist == u'bonobo') 
+            & (MP3.ready == True) & (MP3.error == False) 
+            & (MP3.echoprint_fp != None))
+        for mp3 in mp3s:
+            code = self._generate_code_json(mp3.echoprint_fp, mp3.sha1)
+            if not code:
+                continue
+            self.codes[mp3.sha1] = code['fp']
+            fp.ingest(code, do_commit=False, local=True)
+        elapsed = time.time() - start
+        log.debug('loaded in-memory fingerprint database in %g seconds' % elapsed)
+
+    def _scan_for_dupes(self):
+        # Now dedupe by going through all our codes over again.
+        log.debug('de-duping fingerprint database...')
+        start = time.time()
+        for sha1, code in self.codes.iteritems():
+            results = fp.query_fp(code, local=True).results
+            if len(results) < 2:
+                continue
+            self._dedupe_song(sha1, code, results)
+        elapsed = time.time() - start
+        log.debug('de-duped fingerprint database in %g seconds' % elapsed)
+
+    def _dedupe_song(self, sha1, code_string, results):
+        """Find fingerprint matches and eventually de-duplicate a song.
         
-    data = {"track_id": track_id,
-            "fp": decoded,
-            "length": length,
-            "codever": "%.2f" % version
-            }
-    if artist: data["artist"] = artist
-    if release: data["release"] = release
-    if title: data["track"] = title
-    return data
-
-
-def dedupe_db():
-
-    codes = {}
-
-    # Load all known fingerprints into the db.
-    mp3s = MP3.query.filter(
-        (MP3.artist == u'bonobo') 
-        & (MP3.ready == True) & (MP3.error == False) 
-        & (MP3.echoprint_fp != None))
-    for mp3 in mp3s:
-        code = generate_code_json(mp3.echoprint_fp, mp3.sha1)
-        if not code:
-            continue
-        codes[mp3.sha1] = code['fp']
-        fp.ingest([code], do_commit=False, local=True)
-
-    # Now dedupe by going through all our codes over again.
-    for sha1, code in codes.iteritems():
-        results = fp.query_fp(code, local=True).results
-        if len(results) < 2:
-            continue
-        print_scores(sha1, code, results)
-
-
-def print_scores(sha1, code_string, results):
-    elbow = 10
-    code_len = len(code_string.split(' ')) / 2
-    actual_scores = {}
-    original_scores = {}
-    for entry in results:
-        track_id = entry['track_id']
-        track_code = fp.local_fp_code_for_track_id(track_id)
-        actual_scores[track_id] = fp.actual_matches(code_string, track_code, elbow=elbow)
-        original_scores[track_id] = entry['score']
-
-    # Histogram-based score computation. Only keep the highest per-track score.
-    sorted_actual_scores = sorted(actual_scores.iteritems(), key=lambda (k, v): v, reverse=True)
-    new_sorted_actual_scores = []
-    existing_track_ids = set()
-    for trid, score in sorted_actual_scores:
-        track_id = trid.split('-')[0]
-        if track_id not in existing_track_ids:
-            existing_track_ids.add(track_id)
-            new_sorted_actual_scores.append((trid, score))
-
-    orig = MP3.query.get(sha1)
-    print "\nSONG: %s/%s (%s) code_len=%d" % (orig.artist, orig.title, sha1, code_len)
-
-    top_score = new_sorted_actual_scores[0][1]
-    for track_id, score in new_sorted_actual_scores:
-        track_sha1 = track_id.split('-')[0]
-        if track_sha1 == sha1:
-            continue
-        if score < code_len * 0.1:
-            continue
-        if score < top_score / 2:
-            continue
-        #if (top_score - score) < (top_score / 2):
-        #    continue
-        mp3 = MP3.query.get(track_sha1)
-        print '  --> %s (%s orig:%s), %s/%s' % (track_sha1, score, original_scores[track_id], 
-                                                mp3.artist, mp3.title)
+        Returns True if de-duplication was performed, False otherwise.
+        """
+        elbow = 10
+        code_len = len(code_string.split(' ')) / 2
+        actual_scores = {}
+        original_scores = {}
+        for entry in results:
+            track_id = entry['track_id']
+            track_code = fp.local_fp_code_for_track_id(track_id)
+            actual_scores[track_id] = fp.actual_matches(
+                code_string, track_code, elbow=elbow)
+            original_scores[track_id] = entry['score']
+
+        # Histogram-based score computation. Only keep the highest per-track score.
+        sorted_actual_scores = sorted(actual_scores.iteritems(),
+                                      key=lambda (k, v): v, reverse=True)
+        new_sorted_actual_scores = []
+        existing_track_ids = set()
+        for trid, score in sorted_actual_scores:
+            track_id = trid.split('-')[0]
+            if track_id not in existing_track_ids:
+                existing_track_ids.add(track_id)
+                new_sorted_actual_scores.append((trid, score))
+
+        dupes = []
+
+        top_score = new_sorted_actual_scores[0][1]
+        for track_id, score in new_sorted_actual_scores:
+            track_sha1 = track_id.split('-')[0]
+            if score < code_len * 0.1:
+                continue
+            if score < top_score / 2:
+                continue
+            #if (top_score - score) < (top_score / 2):
+            #    continue
+            dupes.append((track_sha1, score, original_scores[track_id]))
+
+        if len(dupes) < 2:
+            # Only one fingerprint matches. Good!
+            return False
+
+        # Print out some debugging information.
+        orig = MP3.query.get(sha1)
+        log.info("duplicates for '%s/%s' (%s) code_len=%d: %s" % (
+                orig.artist, orig.title, sha1, code_len))
+        for track_sha1, score, original_score in dupes:
+            mp3 = MP3.query.get(track_sha1)
+            log.info('  --> %s (%s orig:%s), %s/%s' % (
+                    track_sha1, score, original_score, mp3.artist, mp3.title))
+
+        # Actually de-duplicate the songs we've found.
+        self._resolve_dupes([x[0] for x in dupes])
+
+    def _resolve_dupes(self, hashes):
+        """Perform best duplicate selection and remove dupes from db."""
+        log.debug('remove_dupes(%s)' % ','.join(hashes))
+
 
 
 def run_deduper(db_url):
     init_db(db_url)
-    dedupe_db()
+    dup = DeDuper()
+    dup.dedupe()
 
 
 def main():