Commit 07ca7aa8 authored by ale's avatar ale

initial commit

parents
SUBDIRS = immscore analyzer model
include $(top_srcdir)/vars.mk
bin_PROGRAMS = imms-analyzer
imms_analyzer_SOURCES = \
analyzer.cc \
analyzer.h \
beatkeeper.cc \
beatkeeper.h \
fftprovider.cc \
fftprovider.h \
hanning.h \
melfilter.cc \
melfilter.h \
mfcckeeper.cc \
mfcckeeper.h
imms_analyzer_LDADD = ../immscore/libimmscore.a
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <errno.h>
#include <iostream>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <string.h>
#include <unistd.h>
#include <immsutil.h>
#include <appname.h>
#include <base64.h>
#include "analyzer.h"
#include "strmanip.h"
#include "melfilter.h"
#include "fftprovider.h"
#include "mfcckeeper.h"
#include "beatkeeper.h"
#include "hanning.h"
using std::cout;
using std::cerr;
using std::endl;
typedef uint16_t sample_t;
const string AppName = ANALYZER_APP;
// Calculate acoustic stats for a song.
//
// Analyzer calculates the Beats Per Minute (BPM) and
// Mel-frequency cepstral coefficients (MFCC) for a song. These stats are used
// by IMMS to boost/penalize song transitions for songs that have
// similar/dissimilar acoustic characteristics - i.e. Analyzer helps IMMS
// match the 'mood'/theme of the next song to the previous one.
//
// BPM is a measure of how "fast" a song is;
// this is a valuable signal as slow and fast songs generally don't mix.
// MFCC is meant to capture the type of the song;
// i.e. what instruments are used, type of vocals, etc.
//
// As of IMMS 1.2 Analyzer is a separate application, and called as needed.
// Analyzer is an optional component; if not used IMMS will simply use its
// other sources to determine the next song.
class Analyzer
{
public:
Analyzer() : hanwin(WINDOWSIZE) { }
int analyze(FILE*);
protected:
FFTWisdom wisdom;
FFTProvider<WINDOWSIZE> pcmfft;
FFTProvider<NUMMEL> specfft;
MelFilterBank mfbank;
HanningWindow hanwin;
};
// Calculate acoustic stats for a song and write them to the database.
int Analyzer::analyze(FILE *p)
{
static const bool test_mode = 0;
StackTimer t;
size_t frames = 0;
sample_t indata[WINDOWSIZE];
vector<double> outdata(NUMFREQS);
MFCCKeeper mfcckeeper;
BeatManager beatkeeper;
int r = fread(indata, sizeof(sample_t), OVERLAP, p);
if (r != OVERLAP)
return -5;
while (fread(indata + OVERLAP, sizeof(sample_t), READSIZE, p) ==
READSIZE && ++frames < MAXFRAMES)
{
// calculate MFCCs:
for (int i = 0; i < WINDOWSIZE; ++i)
pcmfft.input()[i] = (double)indata[i];
// window the data
hanwin.apply(pcmfft.input(), WINDOWSIZE);
// fft to get the spectrum
pcmfft.execute();
// calculate the power spectrum
for (int i = 0; i < NUMFREQS; ++i)
outdata[i] = pow(pcmfft.output()[i][0], 2) +
pow(pcmfft.output()[i][1], 2);
// apply mel filter bank
vector<double> melfreqs;
mfbank.apply(outdata, melfreqs);
beatkeeper.process(melfreqs);
// compute log energy
for (int i = 0; i < NUMMEL; ++i)
melfreqs[i] = log(melfreqs[i]);
// another fft to get the MFCCs
specfft.apply(melfreqs);
// discard the first mfcc
float cepstrum[NUMCEPSTR];
for (int i = 1; i <= NUMCEPSTR; ++i)
cepstrum[i - 1] = specfft.output()[i][0];
mfcckeeper.process(cepstrum);
// finally shift the already read data
memmove(indata, indata + READSIZE, OVERLAP * sizeof(sample_t));
}
#ifdef DEBUG
cerr << "obtained " << frames << " frames" << endl;
#endif
// did we read enough data?
if (test_mode || frames < 100)
return 0;
mfcckeeper.finalize();
beatkeeper.finalize();
// Output a JSON array with the result (binary data is encoded
// using base64).
std::string mfccbuf = mfcckeeper.get_result().serialize(),
beatsbuf = beatkeeper.serialize();
cout << "{" << endl
<< " mfcc: \""
<< base64::base64_encode((unsigned char *)mfccbuf.data(), mfccbuf.size())
<< "\"," << endl
<< " beats: \""
<< base64::base64_encode((unsigned char *)beatsbuf.data(), beatsbuf.size())
<< "\"" << endl
<< "}" << endl;
return 0;
}
int main(int argc, char *argv[])
{
if (argc > 1)
{
cout << "usage: analyzer" << endl
<< "File data must be passed on standard input as:" << endl
<< " raw PCM, 16-bit unsigned, mono, " << SAMPLERATE << " Hz"
<< endl << endl;
return -1;
}
nice(15);
Analyzer analyzer;
if (analyzer.analyze(stdin))
LOG(ERROR) << "Could not process input data." << endl;
}
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __ANALYZER_H
#define __ANALYZER_H
#define WINDOWSIZE 512
#define OVERLAP 256
#define READSIZE (WINDOWSIZE - OVERLAP)
#define SAMPLERATE 22050 // half of 44100; what most music is encoded as
#define MAXFRAMES ((SAMPLERATE*60*4)/READSIZE)
#define WINPERSEC (SAMPLERATE / (WINDOWSIZE - OVERLAP))
// important to take (WINDOWSIZE / 2) MFCCs
#define NUMFREQS (WINDOWSIZE / 2 + 1)
// MAXFREQ has to be lower than the Nyquist frequency
// (i.e. lower than half the sampling rate).
// We can't measure frequencies faster than that with the given sampling rate.
#define MAXFREQ (SAMPLERATE / 2)
#define FREQDELTA ROUND(MAXFREQ / (float)NUMFREQS)
#define MINFREQ FREQDELTA
#endif
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <string.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <map>
#include "beatkeeper.h"
#include "immsutil.h"
using std::string;
using std::endl;
using std::vector;
using std::map;
using std::cerr;
// Grab some info about peaks of beats (where the most beats are located)?
// TODO: this function isn't currently used;
// it's an experiment to see if it helps improve IMMS results.
// Do we want to reactivate or continue testing it at some point?
bool BeatKeeper::extract_features(float *beats, vector<float> &features)
{
float sum = 0, min = 1e100, max = 0;
for (int i = 0; i < BEATSSIZE; ++i)
{
if (beats[i] < min)
min = beats[i];
if (beats[i] > max)
max = beats[i];
sum += beats[i];
}
if (max == 0 || max == min)
return false;
features.push_back(max); // max
features.push_back(min / max); // relative min
features.push_back(sum / ((max - min) * BEATSSIZE)); // relative area
map<int, int> allpeaks;
float cutoff = max - (max - min) * 0.2;
float localmax = 0;
int maxindex = 0, width = 0;
for (int i = BEATSSIZE - 1; i >= 0; --i)
{
if (beats[i] < cutoff)
{
if (!width)
continue;
int realwidth = OFFSET2BPM(i - width) - OFFSET2BPM(i);
allpeaks[realwidth] = OFFSET2BPM(maxindex);
localmax = 0;
width = 0;
continue;
}
if (beats[i] > localmax)
{
localmax = beats[i];
maxindex = i;
}
++width;
}
map<int, int> peaks;
map<int, int>::reverse_iterator i = allpeaks.rbegin();
for (int j = 0; j < 3; ++j, ++i)
{
if (i == allpeaks.rend())
break;
peaks[i->first] = i->second;
}
int first_peak = 0;
for (map<int, int>::iterator i = peaks.begin(); i != peaks.end(); ++i)
{
if (!first_peak)
features.push_back(first_peak = i->second);
else
features.push_back(i->second / (float)first_peak);
features.push_back(beats[i->second] / max);
features.push_back(i->first);
}
for (int i = peaks.size(); i < 3; ++i)
{
features.push_back(0);
features.push_back(0);
features.push_back(0);
}
#if defined(DEBUG) && 0
cerr << "Kept peaks" << endl;
for (map<int, int>::iterator i = peaks.begin(); i != peaks.end(); ++i)
cerr << " -> @ " << i->second << " = " << i->first << endl;
#endif
return true;
}
void BeatKeeper::reset()
{
samples = 0;
memset(data, 0, sizeof(data));
memset(beats, 0, sizeof(beats));
current_position = current_window = data;
last_window = &data[MAXBEATLENGTH];
}
// Dump debug data on the beats.
void BeatKeeper::dump(const string &filename)
{
std::ofstream bstats(filename.c_str(), std::ios::trunc);
for (int i = 0; i < BEATSSIZE; ++i)
bstats << OFFSET2BPM(i) << " " << ROUND(beats[i]) << endl;
bstats.close();
}
void BeatKeeper::process(float power)
{
*current_position++ = power;
if (current_position - current_window == MAXBEATLENGTH)
process_window();
}
// Compute an auto-correlation of the signal with itself.
// By looking at the peaks in the auto-correlation we can tell
// what the beats are.
void BeatKeeper::process_window()
{
// update beat values
for (int i = 0; i < MAXBEATLENGTH; ++i)
{
for (int offset = MINBEATLENGTH; offset < MAXBEATLENGTH; ++offset)
{
int p = i + offset;
float warped = *(p < MAXBEATLENGTH ?
last_window + p : current_window + p - MAXBEATLENGTH);
beats[offset - MINBEATLENGTH] += last_window[i] * warped;
}
}
// swap the windows
float *tmp = current_window;
current_window = current_position = last_window;
last_window = tmp;
}
void BeatManager::process(const std::vector<double> &melfreqs)
{
lofreq.process((melfreqs[0] + melfreqs[1]) / 1e11); // TODO:why 1e11 here?
}
void BeatManager::finalize()
{
#ifdef DEBUG
lofreq.dump("/tmp/lofreq");
#endif
}
float *BeatManager::get_result()
{
return lofreq.beats;
}
std::string BeatManager::serialize() const {
std::ostringstream o;
o.write((char *)lofreq.beats, ResultSize);
return o.str();
}
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __BEATKEEPER_H
#define __BEATKEEPER_H
#include <string>
#include <vector>
#include "analyzer.h"
#define MINBPM 50
#define MAXBPM 250
#define MINBEATLENGTH (WINPERSEC*60/MAXBPM)
#define MAXBEATLENGTH (WINPERSEC*60/MINBPM)
#define BEATSSIZE (MAXBEATLENGTH-MINBEATLENGTH)
#define OFFSET2BPM(offset) \
ROUND(60 * WINPERSEC / (float)(MINBEATLENGTH + offset))
// Gather info on beat distribution.
class BeatKeeper
{
friend class BeatManager;
public:
BeatKeeper() { reset(); }
void reset();
void process(float power);
void dump(const std::string &filename);
const BeatKeeper &operator +=(const BeatKeeper &other);
static bool extract_features(float *beats, std::vector<float> &features);
protected:
void process_window();
long unsigned int samples;
float average_with, *last_window, *current_window, *current_position;
float data[2*MAXBEATLENGTH];
float beats[BEATSSIZE];
};
// Help other functions/callers gather and use the information collected by the
// BeatKeeper class.
class BeatManager
{
public:
void process(const std::vector<double> &melfreqs);
void finalize();
float *get_result();
std::string serialize() const;
static const int ResultSize = BEATSSIZE * sizeof(float);
protected:
BeatKeeper lofreq;
};
#endif
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <iostream>
#include <immsutil.h>
#include "fftprovider.h"
using std::cerr;
using std::endl;
FFTWisdom::FFTWisdom() : shouldexport(true)
{
// Grab FFTW wisdom, if available, on instantiation.
// FFTW handles the storage and usage internally, which gives us the
// benefit for all fftw functions even if we don't explicitly do anything
// with this variable or class. See the fftw docs - "wisdom .. is stored
// in a global, private data structure managed internally by FFTW";
// http://www.fftw.org/fftw3_doc/Words-of-Wisdom_002dSaving-Plans.html#Words-of-Wisdom_002dSaving-Plans
FILE *wisdom = fopen(get_imms_root(".fftw_wisdom").c_str(), "r");
if (wisdom)
{
shouldexport = !fftw_import_wisdom_from_file(wisdom);
fclose(wisdom);
}
else
cerr << "analyzer: Growing wiser. This may take a while." << endl;
}
FFTWisdom::~FFTWisdom()
{
if (!shouldexport)
return;
FILE *wisdom = fopen(get_imms_root(".fftw_wisdom").c_str(), "w");
if (wisdom)
{
fftw_export_wisdom_to_file(wisdom);
fclose(wisdom);
}
else
cerr << "analyzer: Could not write to wisdom file!" << endl;
}
/*
IMMS: Intelligent Multimedia Management System
Copyright (C) 2001-2009 Michael Grigoriev
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __FFTPROVIDER_H
#define __FFTPROVIDER_H
#include <memory>
#include <fftw3.h>
// Import and export FFTW Wisdom files. See FFTW docs at www.fftw.org.
//
// Wisdom is automatically used when you load or generate a Wisdom file
// (see "wisdom .. is stored in a global, private data structure" in the docs).
// Thus analyzer doesn't have to do anything to benefit from wisdom
// other than create an instance of FFTWisdom object,
// which will import/generate wisdom as appropriate.
//
// See Also:
// http://www.fftw.org/doc/Introduction.html
// http://www.fftw.org/doc/Words-of-Wisdom_002dSaving-Plans.html#Words-of-Wisdom_002dSaving-Plans
class FFTWisdom
{