Skip to content
Snippets Groups Projects
Select Git revision
  • 98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c
  • master default protected
  • better-queue
3 results

crawler_test.go

Blame
    • ale's avatar
      4cd67e72
      Add tags (primary/related) to links · 4cd67e72
      ale authored
      This change allows more complex scope boundaries, including loosening
      edges a bit to include related resources of HTML pages (which makes
      for more complete archives if desired).
      4cd67e72
      History
      Add tags (primary/related) to links
      ale authored
      This change allows more complex scope boundaries, including loosening
      edges a bit to include related resources of HTML pages (which makes
      for more complete archives if desired).
    external.py 2.46 KiB
    import hashlib
    import os
    import subprocess
    import urllib
    import urllib2
    from lxml import etree
    from djrandom import utils
    
    
    class AlbumImageDiskCache(object):
        """Cache album art on disk.
    
        Files are saved and converted to JPEG using Imagemagick.  Negative
        matches are saved as empty files.
        """
    
        def __init__(self, root):
            self.root = root
    
        def _path(self, artist, album):
            album_hash = hashlib.sha1('%s|%s' % (artist, album)).hexdigest()
            return utils.generate_path(self.root, album_hash)
    
        def has(self, artist, album):
            return os.path.exists(self._path(artist, album))
    
        def get(self, artist, album):
            path = self._path(artist, album)
            if os.path.getsize(path) == 0:
                return None
            return path
    
        def set_negative_match(self, artist, album):
            open(self._path(artist, album), 'w').close()
    
        def download(self, artist, album, url):
            """Download url and convert to JPEG."""
            path = self._path(artist, album)
            tmpf = path + '.tmp'
            with open(tmpf, 'w') as fd:
                fd.write(urllib2.urlopen(url).read())
            subprocess.call(
                ['/usr/bin/convert', tmpf, '-quality', '75', 'jpeg:%s' % path])
            os.unlink(tmpf)
    
    
    class AlbumImageRetriever(object):
    
        def __init__(self, api_key, cache_root):
            self.api_key = api_key
            self.cache = AlbumImageDiskCache(cache_root)
    
        def _get_album_info(self, artist, album):
            args = {'method': 'album.getInfo',
                    'artist': artist,
                    'album': album,
                    'api_key': self.api_key}
            request = urllib2.urlopen(
                'http://ws.audioscrobbler.com/2.0/?%s' % urllib.urlencode(args))
            return etree.fromstring(request.read())
    
        def get_album_image(self, artist, album):
            if not self.cache.has(artist, album):
                try:
                    xml = self._get_album_info(artist, album)
                    xp = etree.XPath('album/image[@size="extralarge"]')
                    img = xp(xml)
                    if img:
                        self.cache.download(artist, album, img[0].text)
                    else:
                        self.cache.set_negative_match(artist, album)
                except:
                    return None
            return self.cache.get(artist, album)
    
    
    if __name__ == '__main__':
        import sys
        api_key, artist, album = sys.argv[1:]
        air = AlbumImageRetriever(api_key, '/var/tmp/album-image-cache')
        print air.get_album_image(artist, album)