From fe6c8c0f226e93ebad8fdcb2244e881e9b0ab7ef Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Fri, 20 Jan 2012 22:24:37 +0000
Subject: [PATCH] in the album art cache, when the album name ends with a
 number, try querying last.fm again without the number at the end

---
 server/djrandom/model/external.py | 36 ++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/server/djrandom/model/external.py b/server/djrandom/model/external.py
index 3eb6e46..2708dc6 100644
--- a/server/djrandom/model/external.py
+++ b/server/djrandom/model/external.py
@@ -1,5 +1,6 @@
 import hashlib
 import os
+import re
 import subprocess
 import urllib
 import urllib2
@@ -12,6 +13,12 @@ class AlbumImageDiskCache(object):
 
     Files are saved and converted to JPEG using Imagemagick.  Negative
     matches are saved as empty files.
+
+    If you want to periodically retry 'missed' entries (to recover from
+    temporary errors, for example), you can simply run:
+
+        find $DIR -type f -size 0 -mtime +$DAYS -exec rm -f \{\} +
+
     """
 
     def __init__(self, root):
@@ -61,16 +68,25 @@ class AlbumImageRetriever(object):
 
     def get_album_image(self, artist, album):
         if not self.cache.has(artist, album):
-            try:
-                xml = self._get_album_info(artist, album)
-                xp = etree.XPath('album/image[@size="extralarge"]')
-                img = xp(xml)
-                if img:
-                    self.cache.download(artist, album, img[0].text)
-                else:
-                    self.cache.set_negative_match(artist, album)
-            except:
-                return None
+            queries = [(artist, album)]
+            # Fix a minor annoyance that is popular in ID3 tags: if the
+            # album name ends in a number, it might be part of a series;
+            # in that case, try again without the number.
+            m = re.search(r'^(.+) \d+$', album)
+            if m:
+                queries.append((artist, m.group(1)))
+            for query_artist, query_album in queries:
+                try:
+                    xml = self._get_album_info(query_artist, query_album)
+                    xp = etree.XPath('album/image[@size="extralarge"]')
+                    img = xp(xml)
+                    if img:
+                        self.cache.download(artist, album, img[0].text)
+                    else:
+                        self.cache.set_negative_match(artist, album)
+                except:
+                    continue
+                break
         return self.cache.get(artist, album)
 
 
-- 
GitLab