Commit 274da1a1 authored by ale's avatar ale

turned analyzer into a library; provided a global C wrapper for the API

parent 566b2af5
include $(top_srcdir)/vars.mk
SUBDIRS = ext immscore analyzer model
SUBDIRS = ext immscore analyzer model .
lib_LTLIBRARIES = libimms-c.la
libimms_c_la_SOURCES = \
imms-c.cc \
imms-c.h
libimms_c_la_LIBADD = $(ANALYZER_LIB) $(IMMSCORE_LIB) $(TORCH_LIB)
......@@ -4,8 +4,12 @@ noinst_LTLIBRARIES = libimmsanalyzer.la
bin_PROGRAMS = imms-analyzer
libimmsanalyzer_la_SOURCES = \
analyzer-params.h \
analyzer.cc \
analyzer.h \
beatkeeper.cc \
beatkeeper.h \
features.h \
fftprovider.cc \
fftprovider.h \
hanning.h \
......@@ -15,8 +19,7 @@ libimmsanalyzer_la_SOURCES = \
mfcckeeper.h
imms_analyzer_SOURCES = \
analyzer.cc \
analyzer.h
main.cc
imms_analyzer_LDADD = $(ANALYZER_LIB) $(IMMSCORE_LIB) $(TORCH_LIB)
#ifndef __analyzer_params_H
#define __analyzer_params_H
#define WINDOWSIZE 512
#define OVERLAP 256
#define READSIZE (WINDOWSIZE - OVERLAP)
#define SAMPLERATE 22050 // half of 44100; what most music is encoded as
#define MAXFRAMES ((SAMPLERATE*60*4)/READSIZE)
#define WINPERSEC (SAMPLERATE / (WINDOWSIZE - OVERLAP))
// important to take (WINDOWSIZE / 2) MFCCs
#define NUMFREQS (WINDOWSIZE / 2 + 1)
// MAXFREQ has to be lower than the Nyquist frequency
// (i.e. lower than half the sampling rate).
// We can't measure frequencies faster than that with the given sampling rate.
#define MAXFREQ (SAMPLERATE / 2)
#define FREQDELTA ROUND(MAXFREQ / (float)NUMFREQS)
#define MINFREQ FREQDELTA
#endif
......@@ -18,7 +18,6 @@
*/
#include <errno.h>
#include <iostream>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string>
......@@ -26,16 +25,11 @@
#include <unistd.h>
#include <immsutil.h>
#include <appname.h>
#include <base64.h>
#include "analyzer.h"
#include "strmanip.h"
#include "melfilter.h"
#include "fftprovider.h"
#include "mfcckeeper.h"
#include "beatkeeper.h"
#include "hanning.h"
using std::cout;
using std::cerr;
......@@ -43,39 +37,8 @@ using std::endl;
typedef uint16_t sample_t;
const string AppName = ANALYZER_APP;
// Calculate acoustic stats for a song.
//
// Analyzer calculates the Beats Per Minute (BPM) and
// Mel-frequency cepstral coefficients (MFCC) for a song. These stats are used
// by IMMS to boost/penalize song transitions for songs that have
// similar/dissimilar acoustic characteristics - i.e. Analyzer helps IMMS
// match the 'mood'/theme of the next song to the previous one.
//
// BPM is a measure of how "fast" a song is;
// this is a valuable signal as slow and fast songs generally don't mix.
// MFCC is meant to capture the type of the song;
// i.e. what instruments are used, type of vocals, etc.
//
// As of IMMS 1.2 Analyzer is a separate application, and called as needed.
// Analyzer is an optional component; if not used IMMS will simply use its
// other sources to determine the next song.
class Analyzer
{
public:
Analyzer() : hanwin(WINDOWSIZE) { }
int analyze(FILE*);
protected:
FFTWisdom wisdom;
FFTProvider<WINDOWSIZE> pcmfft;
FFTProvider<NUMMEL> specfft;
MelFilterBank mfbank;
HanningWindow hanwin;
};
// Calculate acoustic stats for a song and write them to the database.
int Analyzer::analyze(FILE *p)
Features *Analyzer::analyze(FILE *p)
{
static const bool test_mode = 0;
......@@ -92,7 +55,7 @@ int Analyzer::analyze(FILE *p)
int r = fread(indata, sizeof(sample_t), OVERLAP, p);
if (r != OVERLAP)
return -5;
return NULL;
while (fread(indata + OVERLAP, sizeof(sample_t), READSIZE, p) ==
READSIZE && ++frames < MAXFRAMES)
......@@ -142,42 +105,11 @@ int Analyzer::analyze(FILE *p)
// did we read enough data?
if (test_mode || frames < 100)
return 0;
return NULL;
mfcckeeper.finalize();
beatkeeper.finalize();
// Output a JSON array with the result (binary data is encoded
// using base64).
std::string mfccbuf = mfcckeeper.get_result().serialize(),
beatsbuf = beatkeeper.serialize();
cout << "{" << endl
<< " mfcc: \""
<< base64::base64_encode((unsigned char *)mfccbuf.data(), mfccbuf.size())
<< "\"," << endl
<< " beats: \""
<< base64::base64_encode((unsigned char *)beatsbuf.data(), beatsbuf.size())
<< "\"" << endl
<< "}" << endl;
return 0;
return new Features(mfcckeeper.get_result(), beatkeeper.get_result());
}
int main(int argc, char *argv[])
{
if (argc > 1)
{
cout << "usage: analyzer" << endl
<< "File data must be passed on standard input as:" << endl
<< " raw PCM, 16-bit unsigned, mono, " << SAMPLERATE << " Hz"
<< endl << endl;
return -1;
}
nice(15);
Analyzer analyzer;
if (analyzer.analyze(stdin))
LOG(ERROR) << "Could not process input data." << endl;
}
......@@ -19,23 +19,42 @@
#ifndef __ANALYZER_H
#define __ANALYZER_H
#define WINDOWSIZE 512
#define OVERLAP 256
#define READSIZE (WINDOWSIZE - OVERLAP)
#define SAMPLERATE 22050 // half of 44100; what most music is encoded as
#define MAXFRAMES ((SAMPLERATE*60*4)/READSIZE)
#define WINPERSEC (SAMPLERATE / (WINDOWSIZE - OVERLAP))
// important to take (WINDOWSIZE / 2) MFCCs
#define NUMFREQS (WINDOWSIZE / 2 + 1)
// MAXFREQ has to be lower than the Nyquist frequency
// (i.e. lower than half the sampling rate).
// We can't measure frequencies faster than that with the given sampling rate.
#define MAXFREQ (SAMPLERATE / 2)
#define FREQDELTA ROUND(MAXFREQ / (float)NUMFREQS)
#define MINFREQ FREQDELTA
#include <string>
#include <math.h>
#include "analyzer-params.h"
#include "melfilter.h"
#include "features.h"
#include "fftprovider.h"
#include "hanning.h"
// Calculate acoustic stats for a song.
//
// Analyzer calculates the Beats Per Minute (BPM) and
// Mel-frequency cepstral coefficients (MFCC) for a song. These stats are used
// by IMMS to boost/penalize song transitions for songs that have
// similar/dissimilar acoustic characteristics - i.e. Analyzer helps IMMS
// match the 'mood'/theme of the next song to the previous one.
//
// BPM is a measure of how "fast" a song is;
// this is a valuable signal as slow and fast songs generally don't mix.
// MFCC is meant to capture the type of the song;
// i.e. what instruments are used, type of vocals, etc.
//
// As of IMMS 1.2 Analyzer is a separate application, and called as needed.
// Analyzer is an optional component; if not used IMMS will simply use its
// other sources to determine the next song.
class Analyzer
{
public:
Analyzer() : hanwin(WINDOWSIZE) { }
Features *analyze(FILE*);
protected:
FFTWisdom wisdom;
FFTProvider<WINDOWSIZE> pcmfft;
FFTProvider<NUMMEL> specfft;
MelFilterBank mfbank;
HanningWindow hanwin;
};
#endif
......@@ -185,9 +185,3 @@ float *BeatManager::get_result()
{
return lofreq.beats;
}
std::string BeatManager::serialize() const {
std::ostringstream o;
o.write((char *)lofreq.beats, ResultSize);
return o.str();
}
......@@ -22,7 +22,7 @@
#include <string>
#include <vector>
#include "analyzer.h"
#include "analyzer-params.h"
#define MINBPM 50
#define MAXBPM 250
......@@ -66,7 +66,6 @@ public:
void finalize();
float *get_result();
std::string serialize() const;
static const int ResultSize = BEATSSIZE * sizeof(float);
protected:
......
#ifndef __imms_features_H
#define __imms_features_H
#include <memory>
#include <string.h>
#include "beatkeeper.h"
#include "mfcckeeper.h"
class Features {
public:
static const int MfccSerializedSize = MixtureModel::SerializedSize;
static const int BeatsSerializedSize = BEATSSIZE * sizeof(float);
static const int SerializedSize = MfccSerializedSize + BeatsSerializedSize;
Features() {}
Features(char *buf, int n)
: mm_((float*)buf) {
memcpy(beats_, buf + MfccSerializedSize, BeatsSerializedSize);
}
Features(const MixtureModel& mm, float *beats)
: mm_(mm) {
memcpy(beats_, beats, BeatsSerializedSize);
}
const MixtureModel& mfcc() const {
return mm_;
}
const float *beats() const {
return beats_;
}
float *beats() {
return beats_;
}
int serialize(char *buf, int n) {
if (n < SerializedSize)
return -1;
std::string mmbuf = mm_.serialize();
memcpy(buf, mmbuf.data(), MfccSerializedSize);
memcpy(buf + MfccSerializedSize, beats_, BeatsSerializedSize);
return 0;
}
private:
MixtureModel mm_;
float beats_[BEATSSIZE];
};
#endif
#include <iostream>
#include <sstream>
#include <string>
#include <immsutil.h>
#include <appname.h>
#include <base64.h>
#include "analyzer.h"
using std::cout;
using std::cerr;
using std::endl;
const string AppName = ANALYZER_APP;
std::string serialize_beats(const float *b) {
std::ostringstream o;
o.write((char *)b, BeatManager::ResultSize);
return o.str();
}
void dump_features_json(Features *f) {
// Output a JSON array with the result (binary data is encoded
// using base64).
std::string mfccbuf = f->mfcc().serialize(),
beatsbuf = serialize_beats(f->beats());
cout << "{" << endl
<< " mfcc: \""
<< base64::base64_encode((unsigned char *)mfccbuf.data(), mfccbuf.size())
<< "\"," << endl
<< " beats: \""
<< base64::base64_encode((unsigned char *)beatsbuf.data(), beatsbuf.size())
<< "\"" << endl
<< "}" << endl;
}
int main(int argc, char *argv[])
{
if (argc > 1)
{
cout << "usage: analyzer" << endl
<< "File data must be passed on standard input as:" << endl
<< " raw PCM, 16-bit unsigned, mono, " << SAMPLERATE << " Hz"
<< endl << endl;
return -1;
}
nice(15);
Analyzer analyzer;
Features *f = analyzer.analyze(stdin);
if (!f)
LOG(ERROR) << "Could not process input data." << endl;
else
dump_features_json(f);
}
......@@ -23,6 +23,7 @@
#include <string>
using std::vector;
using std::string;
#define NUMMEL 40 // rough guess within guidelines,
// to match sampling rate but leave space for smoothing
......
......@@ -47,17 +47,18 @@ struct Gaussian
struct MixtureModel
{
static const int SerializedSize = Gaussian::SerializedSize * NUMGAUSS;
MixtureModel() {}
MixtureModel(Torch::DiagonalGMM &gmm)
{ init(gmm); }
MixtureModel(float* in);
MixtureModel &operator =(Torch::DiagonalGMM &gmm)
{ init(gmm); return *this; }
std::string serialize() const;
Gaussian gauss[NUMGAUSS];
static const int SerializedSize = Gaussian::SerializedSize * NUMGAUSS;
MixtureModel() {}
MixtureModel(Torch::DiagonalGMM &gmm) {
init(gmm);
}
MixtureModel(float* in);
MixtureModel &operator =(Torch::DiagonalGMM &gmm)
{ init(gmm); return *this; }
std::string serialize() const;
Gaussian gauss[NUMGAUSS];
private:
void init(Torch::DiagonalGMM &gmm);
void init(Torch::DiagonalGMM &gmm);
};
struct MFCCKeeperPrivate;
......
#include "analyzer.h"
#include "model.h"
#include "imms-c.h"
imms_features_t imms_analyze(FILE *in) {
Analyzer analyzer;
return analyzer.analyze(in);
}
imms_features_t imms_features_from_data(char *buf, int n) {
return (imms_features_t)(new Features(buf, n));
}
int imms_features_data_size() {
return Features::SerializedSize;
}
void imms_features_data(imms_features_t ptr_, char *buf, int n) {
Features *f = reinterpret_cast<Features*>(ptr_);
f->serialize(buf, n);
}
void imms_features_free(imms_features_t ptr_) {
Features *f = reinterpret_cast<Features*>(ptr_);
delete f;
}
imms_similarity_model_t imms_similarity_model_new() {
SVMSimilarityModel *model = new SVMSimilarityModel();
return (imms_similarity_model_t)model;
}
float imms_similarity_model_evaluate(imms_similarity_model_t ptr_,
imms_features_t f1_,
imms_features_t f2_) {
SVMSimilarityModel *model = reinterpret_cast<SVMSimilarityModel*>(ptr_);
Features *f1 = reinterpret_cast<Features*>(f1_);
Features *f2 = reinterpret_cast<Features*>(f2_);
return model->evaluate(f1->mfcc(), f1->beats(),
f2->mfcc(), f2->beats());
}
void imms_similarity_model_free(imms_similarity_model_t ptr_) {
SVMSimilarityModel *model = reinterpret_cast<SVMSimilarityModel*>(ptr_);
delete model;
}
#ifndef __imms_cint_H
#define __imms_cint_H
#include <stdio.h>
extern "C" {
// Opaque types.
typedef void *imms_similarity_model_t;
typedef void *imms_features_t;
// C wrappers for the analysis/model API.
// Analyze the input stream and return feature data.
imms_features_t imms_analyze(FILE *);
// Deserialize feature data.
imms_features_t imms_features_from_data(char *, int);
// Returns the size of the serialized feature data, in bytes.
int imms_features_dump_size();
// Serialize feature data into the provided buffer.
void imms_features_dump(imms_features_t, char *, int);
// Free resources associated with feature data.
void imms_features_free(imms_features_t);
// Create a new similarity model.
imms_similarity_model_t imms_similarity_model_new();
// Evaluate the similarity between two feature sets.
float imms_similarity_model_evaluate(imms_similarity_model_t, imms_features_t, imms_features_t);
// Free resources associated with the similarity model.
void imms_similarity_model_free(imms_similarity_model_t);
}
#endif
......@@ -5,8 +5,6 @@ AM_CPPFLAGS += -DWITH_TORCH
lib_LTLIBRARIES = libimmsmodel.la
libimmsmodel_la_SOURCES = \
cint.cc \
cint.h \
distance.cc \
distance.h \
emd.c \
......
#include "beatkeeper.h"
#include "mfcckeeper.h"
#include "model.h"
#include "cint.h"
const static int full_serialized_size = MixtureModel::SerializedSize + (sizeof(float) * BEATSSIZE);
opaque_similarity_model_t similarity_model_new() {
SVMSimilarityModel *model = new SVMSimilarityModel();
return (opaque_similarity_model_t)model;
}
float similarity_model_evaluate(opaque_similarity_model_t ptr_, float *mm1data, float *beats1, float *mm2data, float *beats2) {
SVMSimilarityModel *model = reinterpret_cast<SVMSimilarityModel*>(ptr_);
MixtureModel mm1(mm1data), mm2(mm2data);
return model->evaluate(mm1, beats1, mm2, beats2);
}
void similarity_model_free(opaque_similarity_model_t ptr_) {
SVMSimilarityModel *model = reinterpret_cast<SVMSimilarityModel*>(ptr_);
delete model;
}
#ifndef __model_cint_H
#define __model_cint_H
extern "C" {
typedef void *opaque_similarity_model_t;
opaque_similarity_model_t similarity_model_new();
float similarity_model_evaluate(opaque_similarity_model_t, float*, float*, float*, float*);
void similarity_model_free(opaque_similarity_model_t);
}
#endif
......@@ -3,15 +3,16 @@ TORCH_LIB = $(top_builddir)/ext/torch3/libtorch.la
IMMSCORE_LIB = $(top_builddir)/immscore/libimmscore.la
ANALYZER_LIB = $(top_builddir)/analyzer/libimmsanalyzer.la
INCLUDES = \
INCS = \
-I$(top_srcdir)/ext/torch3/core \
-I$(top_srcdir)/ext/torch3/distributions \
-I$(top_srcdir)/ext/torch3/gradients \
-I$(top_srcdir)/ext/torch3/kernels \
-I$(top_srcdir)/immscore \
-I$(top_srcdir)/analyzer
-I$(top_srcdir)/analyzer \
-I$(top_srcdir)/model
AM_CPPFLAGS = @CPPFLAGS@ @XCPPFLAGS@ -Wall -fPIC -D_REENTRANT $(INCLUDES)
AM_CPPFLAGS = @CPPFLAGS@ @XCPPFLAGS@ -Wall -fPIC -D_REENTRANT $(INCS)
AM_CXXFLAGS = @CXXFLAGS@ -fno-rtti
AM_LDFLAGS = @LIBS@
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment