diff --git a/server/djrandom/mood/feature_extraction.py b/server/djrandom/mood/feature_extraction.py index 1818608acf49ea3ce1aca20e5870eed85c47bbb7..4aec137e5529412009a51d9aa0fc4719887f34a2 100644 --- a/server/djrandom/mood/feature_extraction.py +++ b/server/djrandom/mood/feature_extraction.py @@ -76,6 +76,7 @@ def _vector_from_file(path): tnet.updControl('mrs_natural/inSamples', factor * 512) tnet.tick() + return tnet.getControl('mrs_realvec/processedData').to_realvec() @@ -84,26 +85,13 @@ def vector_from_file(path): return _vector_from_file(mp3_path) -if __name__ == '__main__': - import sys, time, marsyas_utils +def main(): + import sys, marsyas_utils result = vector_from_file(sys.argv[1]) - - print str(result)[:512] - - print 'serialization benchmark...' - - n = 5000 - start = time.time() - for i in xrange(n): - rstr = marsyas_utils.serialize_realvec(result) - end = time.time() - print 'serialization speed: %g iter/sec' % (n / (end - start)) - - start = time.time() - for i in xrange(n): - result2 = marsyas_utils.deserialize_realvec(rstr) - end = time.time() - print 'deserialization speed: %g iter/sec' % (n / (end - start)) + rstr = marsyas_utils.serialize_realvec(result) + sys.stdout.write(rstr) +if __name__ == '__main__': + main() diff --git a/server/djrandom/mood/mood_scanner.py b/server/djrandom/mood/mood_scanner.py index ba1afe575376019890c9f700f4ca4e72d308c77b..18515d67989f097743d72c90ae02332427b73c6b 100644 --- a/server/djrandom/mood/mood_scanner.py +++ b/server/djrandom/mood/mood_scanner.py @@ -9,19 +9,24 @@ from djrandom import utils from djrandom.model.mp3 import MP3 from djrandom.database import Session, init_db from djrandom.model import processor -from djrandom.mood import feature_extraction -from djrandom.mood import marsyas_utils log = logging.getLogger(__name__) +# We run extract_features as an external program because Marsyas has +# a nasty tendency to leak memory during analysis... +def get_features(path): + pipe = subprocess.Popen(['djrandom-mood-extract-features', path], + stdout=subprocess.PIPE) + return pipe.communicate()[0] + + class TimbreFeatureExtractor(processor.Processor): def process(self, mp3): log.info('extracting features from %s' % mp3.sha1) try: - timbre_vector = feature_extraction.vector_from_file(mp3.path) - vector_str = marsyas_utils.serialize_realvec(timbre_vector) + vector_str = get_features(mp3.path) except Exception, e: log.error('error processing %s: %s' % (mp3.sha1, e)) return diff --git a/server/setup.py b/server/setup.py index 4f06ca9712c0061a9bc536257a421005b4dc4e0b..a2a657a27ba03cde7bd43eb904c05698f3ce10a7 100644 --- a/server/setup.py +++ b/server/setup.py @@ -34,6 +34,7 @@ setup( "djrandom-solr-fixer = djrandom.model.verify:main", "djrandom-mood-scanner = djrandom.mood.mood_scanner:main", "djrandom-mood-db = djrandom.mood.mood_db:main", + "djrandom-mood-extract-features = djrandom.mood.feature_extraction:main", ], }, )