Skip to content
Snippets Groups Projects
Commit 85fc14c5 authored by ale's avatar ale
Browse files

cleanup duplicate files by moving them to a staging directory

parent d16e7dfb
No related branches found
No related tags found
No related merge requests found
......@@ -192,8 +192,15 @@ class DeDuper(object):
bitrate, duration, nmeta, sha1))
# Remove all the other songs.
songs_to_remove = [x for x in hashes if x != best_song]
songs_to_remove = set(x for x in hashes if x != best_song)
log.info('remove_dupes: songs to remove: %s' % (','.join(songs_to_remove)))
for mp3 in mp3s:
if mp3.sha1 not in songs_to_remove:
continue
# Mark the MP3 as duplicate, remove the associated file.
mp3.mark_as_duplicate(best_song)
Session.add(mp3)
Session.commit()
def run_deduper(db_url):
......
import os
import random
import shutil
from sqlalchemy.orm import deferred
from sqlalchemy import *
from datetime import datetime, timedelta
from djrandom.database import Base, Session
# Stage duplicate files to this directory, pending cleanup.
DUPLICATE_DIR = '/var/tmp/djrandom-duplicates'
class MP3(Base):
"""A single MP3.
......@@ -47,6 +52,17 @@ class MP3(Base):
data['duplicate_of'] = self.duplicate_of
return data
def mark_as_duplicate(self, duplicate_of):
self.state = self.DUPLICATE
self.duplicate_of = duplicate_of
try:
if not os.path.isdir(DUPLICATE_DIR):
os.makedirs(DUPLICATE_DIR)
shutil.move(self.path,
os.path.join(DUPLICATE_DIR, self.sha1))
except:
pass
@classmethod
def last_uploaded(cls, n=10):
"""Return the N last uploaded songs."""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment