import hashlib import os import re import subprocess import urllib import urllib2 from lxml import etree from djrandom import utils class AlbumImageDiskCache(object): """Cache album art on disk. Files are saved and converted to JPEG using Imagemagick. Negative matches are saved as empty files. If you want to periodically retry 'missed' entries (to recover from temporary errors, for example), you can simply run: find $DIR -type f -size 0 -mtime +$DAYS -exec rm -f \{\} + """ def __init__(self, root): self.root = root def _path(self, artist, album): album_hash = hashlib.sha1('%s|%s' % (artist, album)).hexdigest() return utils.generate_path(self.root, album_hash) def has(self, artist, album): return os.path.exists(self._path(artist, album)) def get(self, artist, album): path = self._path(artist, album) if os.path.getsize(path) == 0: return None return path def set_negative_match(self, artist, album): open(self._path(artist, album), 'w').close() def download(self, artist, album, url): """Download url and convert to JPEG.""" path = self._path(artist, album) tmpf = path + '.tmp' with open(tmpf, 'w') as fd: fd.write(urllib2.urlopen(url).read()) subprocess.call( ['/usr/bin/convert', tmpf, '-quality', '75', 'jpeg:%s' % path]) os.unlink(tmpf) class AlbumImageRetriever(object): def __init__(self, api_key, cache_root): self.api_key = api_key self.cache = AlbumImageDiskCache(cache_root) def _get_album_info(self, artist, album): args = {'method': 'album.getInfo', 'artist': artist, 'album': album, 'api_key': self.api_key} request = urllib2.urlopen( 'http://ws.audioscrobbler.com/2.0/?%s' % urllib.urlencode(args)) return etree.fromstring(request.read()) def get_album_image(self, artist, album): if not self.cache.has(artist, album): queries = [(artist, album)] # Fix a minor annoyance that is popular in ID3 tags: if the # album name ends in a number, it might be part of a series; # in that case, try again without the number. m = re.search(r'^(.+) \d+$', album) if m: queries.append((artist, m.group(1))) for query_artist, query_album in queries: try: xml = self._get_album_info(query_artist, query_album) xp = etree.XPath('album/image[@size="extralarge"]') img = xp(xml) if img: self.cache.download(artist, album, img[0].text) else: self.cache.set_negative_match(artist, album) except: continue break return self.cache.get(artist, album) if __name__ == '__main__': import sys api_key, artist, album = sys.argv[1:] air = AlbumImageRetriever(api_key, '/var/tmp/album-image-cache') print air.get_album_image(artist, album)