Skip to content
Snippets Groups Projects
Select Git revision
  • 655b0a9fdece88c75dbeab6dd24873477331a311
  • master default
  • renovate/golang.org-x-crypto-0.x
  • customize-ui
  • fix-login-rebase
  • master-old
  • redirect-307
7 results

lex.go

Blame
  • dedup.py 2.22 KiB
    import fp
    import os
    import optparse
    import logging
    import json
    from djrandom import daemonize
    from djrandom import utils
    from djrandom.model.mp3 import MP3
    from djrandom.database import Session, init_db
    
    
    # Taken from 'fastingest.py', with minor changes.
    def generate_code_json(jdata, track_id):
        c = json.loads(jdata)
        if "code" not in c:
            return {}
    
        code = c["code"]
        m = c["metadata"]
        length = m["duration"]
        version = m["version"]
        artist = m.get("artist", None)
        title = m.get("title", None)
        release = m.get("release", None)
        decoded = fp.decode_code_string(code)
            
        data = {"track_id": track_id,
                "fp": decoded,
                "length": length,
                "codever": "%.2f" % version
                }
        if artist: data["artist"] = artist
        if release: data["release"] = release
        if title: data["track"] = title
        return data
    
    
    def dedupe_db():
    
        codes = {}
    
        # Load all known fingerprints into the db.
        mp3s = MP3.query.filter(
            (MP3.ready == True) & (MP3.error == False) 
            & (MP3.echoprint_fp != None))
        for mp3 in mp3s:
            code = generate_code_json(mp3.echoprint_fp, mp3.sha1)
            if not code:
                continue
            codes[mp3.sha1] = code['fp']
            fp.ingest([code], do_commit=False, local=True)
    
        # Now dedupe by going through all our codes over again.
        for sha1, code in codes.iteritems():
            results = fp.query_fp(code, local=True)
            if len(results) < 2:
                continue
            print 'SHA1: %s' % sha1
            for track_id, score in results:
                if track_id == sha1:
                    continue
                print '  --> %s (%f)' % track_id, score
    
    
    def run_deduper(db_url):
        init_db(db_url)
        dedupe_db()
    
    
    def main():
        parser = optparse.OptionParser()
        parser.add_option('--db_url')
        daemonize.add_standard_options(parser)
        utils.read_config_defaults(
            parser, os.getenv('DJRANDOM_CONF', '/etc/djrandom.conf'))
        opts, args = parser.parse_args()
        if not opts.db_url:
            parser.error('Must provide --db_url')
        if args:
            parser.error('Too many arguments')
    
        daemonize.daemonize(opts, run_deduper,
                            (opts.db_url,))
    
    
    if __name__ == '__main__':
        main()