Select Git revision
customize-preview-widgets.min.js
mp3.py 8.99 KiB
import os
import random
import shutil
from sqlalchemy.orm import deferred, relationship
from sqlalchemy import *
from datetime import datetime, timedelta
from djrandom.database import Base, Session
# Stage duplicate files to this directory, pending cleanup.
DUPLICATE_DIR = '/var/tmp/djrandom-duplicates'
class Fingerprint(Base):
__tablename__ = 'fingerprints'
sha1 = Column(String(40), primary_key=True)
echoprint_fp = Column(Text())
class Features(Base):
__tablename__ = 'features'
sha1 = Column(String(40), primary_key=True)
timbre_vector = Column(LargeBinary())
class MP3(Base):
"""A single MP3.
Files are identified by their SHA1 hash for de-duping purposes.
"""
__tablename__ = 'mp3'
__table_args__ = {'mysql_charset': 'utf8'}
INCOMING = 'I'
READY = 'R'
ERROR = 'E'
DUPLICATE = 'D'
BAD_METADATA = 'M'
sha1 = Column(String(40), primary_key=True)
state = Column(String(1), default=INCOMING, index=True)
path = Column(String(1024))
size = Column(Integer())
artist = Column(Unicode(256), index=True)
title = Column(Unicode(256), index=True)
album = Column(Unicode(256), index=True)
track_num = Column(Integer())
genre = Column(Unicode(64))
uploaded_at = Column(DateTime())
play_count = Column(Integer(), default=0)
duplicate_of = Column(String(40))
has_fingerprint = Column(Boolean, default=False)
echoprint_fp = relationship(Fingerprint,
primaryjoin=sha1 == Fingerprint.sha1,
foreign_keys=Fingerprint.sha1,
uselist=False)
has_features = Column(Boolean, default=False)
features = relationship(Features,
primaryjoin=sha1 == Features.sha1,
foreign_keys=Features.sha1,
uselist=False)
def __init__(self, **kw):
for k, v in kw.items():
setattr(self, k, v)
def to_dict(self):
data = {'title': self.title,
'artist': self.artist,
'album': self.album,
'genre': self.genre,
'sha1': self.sha1,
'size': self.size,
'uploaded_at': self.uploaded_at.isoformat()}
if self.duplicate_of:
data['duplicate_of'] = self.duplicate_of
if self.track_num:
data['track_num'] = self.track_num
return data
@classmethod
def get_many(cls, hashes):
order = dict((sha1, idx) for idx, sha1 in enumerate(hashes))
mp3s = cls.query.filter(cls.sha1.in_(hashes))
return sorted(mp3s, key=lambda x: order[x.sha1])
def mark_as_duplicate(self, duplicate_of):
self.state = self.DUPLICATE
self.duplicate_of = duplicate_of
try:
if not os.path.isdir(DUPLICATE_DIR):
os.makedirs(DUPLICATE_DIR)
shutil.move(self.path,
os.path.join(DUPLICATE_DIR, self.sha1))
except:
pass
@classmethod
def deduplicate(cls, hashes):
result = []
for sha1 in hashes:
if sha1:
mp3 = cls.query.get(sha1)
while mp3 and mp3.state == cls.DUPLICATE:
mp3 = cls.query.get(mp3.duplicate_of)
if mp3:
result.append(mp3.sha1)
continue
result.append(None)
return result
def get_fingerprint(self):
if self.has_fingerprint:
return self.echoprint_fp.echoprint_fp
def set_fingerprint(self, fp):
fpobj = Fingerprint.query.get(self.sha1)
if not fpobj:
fpobj = Fingerprint()
fpobj.sha1 = self.sha1
self.echoprint_fp = fpobj
fpobj.echoprint_fp = fp
self.has_fingerprint = True
Session.add(fpobj)
def get_features(self):
if self.has_features:
return self.features
def set_features(self, **args):
fobj = self.features
if not fobj:
fobj = Features()
fobj.sha1 = self.sha1
self.features = fobj
for k, v in args.iteritems():
setattr(fobj, k, v)
self.has_features = True
Session.add(fobj)
@classmethod
def get_with_no_fingerprint(cls):
return cls.query.filter(((cls.state == cls.READY)
| (cls.state == cls.BAD_METADATA))
& (cls.has_fingerprint == 0))
@classmethod
def get_with_no_features(cls):
return cls.query.filter((cls.state == cls.READY)
& (cls.has_features == 0))
@classmethod
def get_with_bad_metadata(cls):
return cls.query.filter_by(state=cls.BAD_METADATA,
has_fingerprint=1)
@classmethod
def last_uploaded(cls, n=10):
"""Return the N last uploaded songs."""
return cls.query.filter_by(state=cls.READY).order_by(
desc(cls.uploaded_at)).limit(n)
@classmethod
def get_random_songs(cls, n=10, where_clause=None):
"""Return N completely random songs."""
results = []
if where_clause is None:
where_clause = (cls.state == cls.READY)
num_songs = cls.query.filter(where_clause).count()
fraction = float(n) / num_songs
where_clause = where_clause & (func.rand() < fraction)
while len(results) < n:
tmprows = Session.query(cls.sha1).filter(where_clause).limit(n)
for row in tmprows:
results.append(row[0])
return results
@classmethod
def get_songs_for_album(cls, artist, album):
return cls.query.filter_by(
state=cls.READY, artist=artist, album=album)
@classmethod
def never_played(cls, n=10):
"""Return N random songs that were never played."""
return cls.get_random_songs(n, where_clause=(
(cls.play_count == 0) & (cls.state == cls.READY)))
@classmethod
def uploads_by_day(cls, days=30):
# select to_days(uploaded_at) as d, count(*) from mp3 group by d order by d asc;
result = []
date_limit = datetime.now() - timedelta(days)
for row in Session.query(func.to_days(cls.uploaded_at).label('day'),
func.count('*').label('count')).filter(
cls.uploaded_at > date_limit).group_by(
'day').order_by('day asc'):
result.append(row.count)
return result
class PlayLog(Base):
__tablename__ = 'playlog'
id = Column(Integer(), primary_key=True)
sha1 = Column(String(40))
userid = Column(String(40), index=True)
stamp = Column(DateTime())
prev = Column(Text())
@classmethod
def most_played(cls, n=10):
"""Return the N most played songs."""
one_month_ago = datetime.now() - timedelta(30)
return Session.query(cls.sha1, func.count(cls.sha1).label('count')
).group_by(cls.sha1).order_by('count desc').limit(n)
@classmethod
def generate_tuples(cls, n=2):
"""Yield all the transitions in the playlog.
Generates (target, (t-n, ... t-1)) tuples for each
t[0..n] -> target transition registered in the playlog.
"""
n -= 1 # account for the target
for plog in cls.query:
if plog.prev:
hashes = plog.prev.split(',')[:n]
if len(hashes) < n:
hashes = ([None] * (n - len(hashes))) + hashes
else:
hashes = [None] * n
target = MP3.deduplicate([plog.sha1])[0]
yield (target, MP3.deduplicate(hashes))
@classmethod
def top_songs_for_user(cls, userid, days=30, n=10):
"""Return the top played songs for a user."""
date_limit = datetime.now() - timedelta(days)
return Session.query(cls.sha1, func.count(cls.sha1).label('count')
).filter((cls.userid == userid)
& (cls.stamp > date_limit)
).group_by(cls.sha1).order_by('count desc').limit(n)
@classmethod
def plays_by_day(cls, days=30):
# select to_days(uploaded_at) as d, count(*) from mp3 group by d order by d asc;
result = []
date_limit = datetime.now() - timedelta(days)
for row in Session.query(func.to_days(cls.stamp).label('day'),
func.count('*').label('count')).filter(
cls.stamp > date_limit).group_by(
'day').order_by('day asc'):
result.append(row.count)
return result
class SearchLog(Base):
__tablename__ = 'searchlog'
__table_args__ = {'mysql_charset': 'utf8'}
id = Column(Integer(), primary_key=True)
userid = Column(String(40), index=True)
query = Column(String(256))
stamp = Column(DateTime())
@classmethod
def add(cls, query_str, userid):
return cls(query=query_str, userid=userid, stamp=datetime.now())