Skip to content

Instantly share code, notes, and snippets.

@faraday
Created May 27, 2011 00:05
Show Gist options
  • Save faraday/994389 to your computer and use it in GitHub Desktop.
Save faraday/994389 to your computer and use it in GitHub Desktop.
Extract audio features with libxtract, from Mp3 input read using Gstreamer
#ifndef MULTI.C
#define MULTI.C
#include <gst/gst.h>
#include <xtract/libxtract.h>
#include <sys/types.h>
#include <dirent.h>
#include <string.h>
#include <math.h>
/* #define BUFSIZE 576 */
#define NAMELEN 300
/*
* Quick and dirty solution to analyze audio data using libxtract and output ARFF files.
* Çağatay Çallı - 2008
*/
int BUFSIZE; /* determined by the first Gstreamer BUFFERSIZE value - see cb_have_data() function */
GstElement *pipeline, *source, *decoder, *conv, *resample, *filter, *sink;
GstClockTime timestamp,duration,ftime;
gfloat *window_left; /* array of size BUFSIZE */
gfloat *window_right; /* array of size BUFSIZE */
gfloat **window_sleft;
gfloat **window_sright;
gboolean full_left = FALSE, full_right = FALSE;
gint index_left = 0, index_right = 0;
gchar filename[NAMELEN];
gchar arff_filename[NAMELEN];
gchar songname[NAMELEN];
FILE *outarff = 0;
FILE *mainarff = 0;
int arg_framesize = 0;
static gboolean
bus_call (GstBus *bus,
GstMessage *msg,
gpointer data)
{
GMainLoop *loop = (GMainLoop *) data;
switch (GST_MESSAGE_TYPE (msg)) {
case GST_MESSAGE_EOS:
/* g_print ("End-of-stream\n"); */
g_main_loop_quit (loop);
break;
case GST_MESSAGE_ERROR: {
gchar *debug;
GError *err;
gst_message_parse_error (msg, &err, &debug);
g_free (debug);
g_print ("Error: %s\n", err->message);
g_error_free (err);
g_main_loop_quit (loop);
break;
}
default:
break;
}
return TRUE;
}
void printArffHeader_song(FILE *ofile){
fprintf(ofile,"@relation 'acoustic'\n\n@attribute TIMESTAMP date \"HH:mm:ss\"\n@attribute MEAN real\n@attribute VARIANCE real\n@attribute STANDARD_DEVIATION real\n@attribute AVERAGE_DEVIATION real\n@attribute SKEWNESS real\n@attribute KURTOSIS real\n@attribute ZEROCROSS real\n@attribute S_MEAN real\n@attribute S_VARIANCE real\n@attribute S_STANDARD_DEVIATION real\n@attribute S_AVERAGE_DEVIATION real\n@attribute S_SKEWNESS real\n@attribute S_KURTOSIS real\n@attribute S_CENTROID real\n@attribute S_SLOPE real\n" /* @attribute SPREAD real\n@attribute SHARPNESS real\n */ "@attribute FLATNESS real\n@attribute TONALITY real\n@attribute ROLLOFF real\n@attribute H_MEAN real\n@attribute H_VARIANCE real\n@attribute H_STANDARD_DEVIATION real\n@attribute H_AVERAGE_DEVIATION real\n@attribute H_SKEWNESS real\n@attribute H_KURTOSIS real\n@attribute H_CENTROID real\n@attribute H_SLOPE real\n" /* @attribute H_SPREAD real\n@attribute H_SHARPNESS real\n */ "@attribute H_FLATNESS real\n@attribute H_TONALITY real\n@attribute H_ROLLOFF real\n@attribute TRIS1 real\n"/*@attribute TRIS2 real\n@attribute TRIS3 real\n*/"@attribute F0 real\n@data\n");
}
void printArffHeader_main(FILE *ofile){
fprintf(ofile,"@relation 'acoustic'\n\n@attribute SONG string\n@attribute TIMESTAMP date \"HH:mm:ss\"\n@attribute MEAN real\n@attribute VARIANCE real\n@attribute STANDARD_DEVIATION real\n@attribute AVERAGE_DEVIATION real\n@attribute SKEWNESS real\n@attribute KURTOSIS real\n@attribute ZEROCROSS real\n@attribute S_MEAN real\n@attribute S_VARIANCE real\n@attribute S_STANDARD_DEVIATION real\n@attribute S_AVERAGE_DEVIATION real\n@attribute S_SKEWNESS real\n@attribute S_KURTOSIS real\n@attribute S_CENTROID real\n@attribute S_SLOPE real\n" /* @attribute SPREAD real\n@attribute SHARPNESS real\n */ "@attribute FLATNESS real\n@attribute TONALITY real\n@attribute ROLLOFF real\n@attribute H_MEAN real\n@attribute H_VARIANCE real\n@attribute H_STANDARD_DEVIATION real\n@attribute H_AVERAGE_DEVIATION real\n@attribute H_SKEWNESS real\n@attribute H_KURTOSIS real\n@attribute H_CENTROID real\n@attribute H_SLOPE real\n" /* @attribute H_SPREAD real\n@attribute H_SHARPNESS real\n */ "@attribute H_FLATNESS real\n@attribute H_TONALITY real\n@attribute H_ROLLOFF real\n@attribute TRIS1 real\n"/*@attribute TRIS2 real\n@attribute TRIS3 real\n*/"@attribute F0 real\n@data\n");
}
void finiteaux(float x,int last){
if(!isnan(x) && !isinf(x)){
fprintf(outarff,"%.10f",x);
fprintf(mainarff,"%.10f",x);
}
else {
fprintf(outarff,"?");
fprintf(mainarff,"?");
}
if(!last) {
fprintf(outarff,",");
fprintf(mainarff,",");
}
else {
fprintf(outarff,"\n");
fprintf(mainarff,"\n");
}
}
/* g_print() calls redirected to this */
void printRedirect(const gchar *str){
fprintf(outarff,"%s",str);
fprintf(mainarff,"%s",str);
}
/* extract features of the window */
void extract(gfloat *data,GstClockTime frametime){
float mean,var,sdev,adev,skew,kurtosis,zcr;
float magspectrum[BUFSIZE];
/* float logpowerspec[BUFSIZE]; */
float peakspec[BUFSIZE],harmonicspec[BUFSIZE];
float sr = 44100.0/BUFSIZE;
float magspecargs[] = {sr,XTRACT_MAGNITUDE_SPECTRUM};
/* float logspecargs[] = {sr,XTRACT_LOG_POWER_SPECTRUM}; */
float harmspecargs[2];
float rolloffargs[] = {sr,0.8};
float smean,svar,ssdev,sadev,sskew,skurtosis,scentroid,sslope;
float hmean,hvar,hsdev,hadev,hskew,hkurtosis,hcentroid,hslope;
float spread,sharpness,flatness,rolloff,tonality,f0;
float hspread,hsharpness,hflatness,hrolloff,htonality,tris1,tris2,tris3;
float mesdev[2],smesdev[2],hmesdev[2];
xtract[XTRACT_MEAN](data,BUFSIZE,NULL,&mean);
xtract[XTRACT_VARIANCE](data,BUFSIZE,&mean,&var);
xtract[XTRACT_STANDARD_DEVIATION](data,BUFSIZE,&var,&sdev);
xtract[XTRACT_AVERAGE_DEVIATION](data,BUFSIZE,&mean,&adev);
mesdev[0] = mean; mesdev[1] = sdev;
xtract[XTRACT_SKEWNESS](data,BUFSIZE,mesdev,&skew);
xtract[XTRACT_KURTOSIS](data,BUFSIZE,mesdev,&kurtosis);
xtract[XTRACT_ZCR](data,BUFSIZE,NULL,&zcr);
xtract[XTRACT_F0](data,BUFSIZE,&sr,&f0);
harmspecargs[0] = f0; harmspecargs[1] = 0.8;
xtract_spectrum(data,BUFSIZE,magspecargs,magspectrum);
/* xtract_spectrum(data,BUFSIZE,logspecargs,logpowerspec); */
xtract_peak_spectrum(magspectrum,BUFSIZE/2,magspecargs,peakspec);
xtract_harmonic_spectrum(peakspec,BUFSIZE,harmspecargs,harmonicspec);
xtract[XTRACT_SPECTRAL_MEAN](magspectrum,BUFSIZE,NULL,&smean);
xtract[XTRACT_SPECTRAL_VARIANCE](magspectrum,BUFSIZE,&smean,&svar);
xtract[XTRACT_SPECTRAL_STANDARD_DEVIATION](magspectrum,BUFSIZE,&svar,&ssdev);
xtract[XTRACT_SPECTRAL_AVERAGE_DEVIATION](magspectrum,BUFSIZE,&smean,&sadev);
mesdev[0] = smean; smesdev[1] = ssdev;
xtract[XTRACT_SPECTRAL_SKEWNESS](magspectrum,BUFSIZE,smesdev,&sskew);
xtract[XTRACT_SPECTRAL_KURTOSIS](magspectrum,BUFSIZE,smesdev,&skurtosis);
xtract[XTRACT_SPECTRAL_CENTROID](magspectrum,BUFSIZE,NULL,&scentroid);
xtract[XTRACT_SPECTRAL_SLOPE](magspectrum,BUFSIZE,&svar,&sslope);
/* xtract[XTRACT_SPREAD](magspectrum,BUFSIZE/2,&smean,&spread);
xtract[XTRACT_SHARPNESS](magspectrum,BUFSIZE/2,NULL,&sharpness); */
xtract[XTRACT_FLATNESS](magspectrum,BUFSIZE/2,NULL,&flatness);
xtract[XTRACT_TONALITY](NULL,0,&flatness,&tonality);
xtract[XTRACT_ROLLOFF](magspectrum,BUFSIZE/2,rolloffargs,&rolloff);
xtract[XTRACT_SPECTRAL_MEAN](harmonicspec,BUFSIZE,NULL,&hmean);
xtract[XTRACT_SPECTRAL_VARIANCE](harmonicspec,BUFSIZE,&hmean,&hvar);
xtract[XTRACT_SPECTRAL_STANDARD_DEVIATION](harmonicspec,BUFSIZE,&hvar,&hsdev);
xtract[XTRACT_SPECTRAL_AVERAGE_DEVIATION](harmonicspec,BUFSIZE,&hmean,&hadev);
hmesdev[0] = hmean; hmesdev[1] = hsdev;
xtract[XTRACT_SPECTRAL_SKEWNESS](harmonicspec,BUFSIZE,hmesdev,&hskew);
xtract[XTRACT_SPECTRAL_KURTOSIS](harmonicspec,BUFSIZE,hmesdev,&hkurtosis);
xtract[XTRACT_SPECTRAL_CENTROID](harmonicspec,BUFSIZE,NULL,&hcentroid);
xtract[XTRACT_SPECTRAL_SLOPE](harmonicspec,BUFSIZE,&hvar,&hslope);
/* xtract[XTRACT_SPREAD](magspectrum,BUFSIZE/2,&smean,&spread);
xtract[XTRACT_SHARPNESS](magspectrum,BUFSIZE/2,NULL,&sharpness); */
xtract[XTRACT_FLATNESS](harmonicspec,BUFSIZE/2,NULL,&hflatness);
xtract[XTRACT_TONALITY](NULL,0,&hflatness,&htonality);
xtract[XTRACT_ROLLOFF](harmonicspec,BUFSIZE/2,rolloffargs,&hrolloff);
xtract[XTRACT_TRISTIMULUS_1](harmonicspec,BUFSIZE/2,NULL,&tris1);
xtract[XTRACT_TRISTIMULUS_2](harmonicspec,BUFSIZE/2,NULL,&tris2);
xtract[XTRACT_TRISTIMULUS_3](harmonicspec,BUFSIZE/2,NULL,&tris3);
/* TODO: print to file HERE */
/* g_print("normal: %f,%f,%f,%f,%f,%f\n",mean,var,sdev,adev,skew,kurtosis);
g_print("spectral: %f,%f,%f,%f,%f,%f\n",smean,svar,ssdev,sadev,sskew,skurtosis); */
/* g_print("%" GST_TIME_FORMAT ",",GST_TIME_ARGS(timestamp+(duration*2/size)*(i/2))); */
fprintf(mainarff,"%s,",songname);
g_print("%" GST_TIME_FORMAT ",",GST_TIME_ARGS(frametime));
finiteaux(mean,0);
finiteaux(var,0);
finiteaux(sdev,0);
finiteaux(adev,0);
finiteaux(skew,0);
finiteaux(kurtosis,0);
finiteaux(zcr,0);
finiteaux(smean,0);
finiteaux(svar,0);
finiteaux(ssdev,0);
finiteaux(sadev,0);
finiteaux(sskew,0);
finiteaux(skurtosis,0);
finiteaux(scentroid,0);
finiteaux(sslope,0);
/* finiteaux(spread,0);
finiteaux(sharpness,0); */
finiteaux(flatness,0);
finiteaux(tonality,0);
finiteaux(rolloff,0);
finiteaux(hmean,0);
finiteaux(hvar,0);
finiteaux(hsdev,0);
finiteaux(hadev,0);
finiteaux(hskew,0);
finiteaux(hkurtosis,0);
finiteaux(hcentroid,0);
finiteaux(hslope,0);
/* finiteaux(hspread,0);
finiteaux(hsharpness,0); */
finiteaux(hflatness,0);
finiteaux(htonality,0);
finiteaux(hrolloff,0);
finiteaux(tris1,0);
/* finiteaux(tris2,0);
finiteaux(tris3,0); */
finiteaux(f0,1);
/* fprintf(outarff,"%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n",mean,var,sdev,adev,skew,kurtosis,smean,svar,ssdev,sadev,sskew,skurtosis,scentroid); */
}
static gboolean
cb_have_data (GstPad *pad,
GstBuffer *buffer,
gpointer u_data)
{
gint i,j,end;
/* GstClockTime timestamp = GST_BUFFER_TIMESTAMP(buffer), duration = GST_BUFFER_DURATION(buffer); */
timestamp = GST_BUFFER_TIMESTAMP(buffer);
duration = GST_BUFFER_DURATION(buffer);
/* gfloat *data = (gfloat *) GST_BUFFER_DATA (buffer); */
gint size = GST_BUFFER_SIZE(buffer) / (sizeof(gfloat) * 2);
gfloat *data = (gfloat *) GST_BUFFER_DATA (buffer);
/* g_print("size: %d\n",size); */
/* set BUFSIZE just once, with first cb_have_data() */
if(!BUFSIZE) {
if(arg_framesize == 0){
BUFSIZE = size/2;
}
else {
BUFSIZE = arg_framesize;
}
window_left = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE * 2);
window_right = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE * 2);
window_sleft = (gfloat **) malloc(sizeof(gfloat *) * 3);
window_sright = (gfloat **) malloc(sizeof(gfloat *) * 3);
window_sleft[0] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
window_sleft[1] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
window_sleft[2] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
window_sright[0] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
window_sright[1] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
window_sright[2] = (gfloat *) malloc(sizeof(gfloat) * BUFSIZE);
}
for(i=0;i<size;i++){
/* DEBUG */
/* g_print("%" GST_TIME_FORMAT ",%f\n",GST_TIME_ARGS(timestamp+(duration*2/size)*(i/2)),data[i]); */
if(i%2==0){ /* left channel */
if(!full_left){
window_left[index_left++] = data[i];
if(index_left >= BUFSIZE*2){
full_left = TRUE;
}
}
}
else { /* right channel */
if(!full_right){
window_right[index_right++] = data[i];
if(index_right >= BUFSIZE*2){
full_right = TRUE;
}
}
}
}
if(full_left){
/* g_print("LEFT "); */
for(i=0;i<3;i++){
switch(i){
case 0: j = 0; end = BUFSIZE; ftime = timestamp + (duration/(size*4)); break;
case 1: j = BUFSIZE/4; end = 3*BUFSIZE/4; ftime = timestamp + (duration/(size*2)); break;
case 2: j = BUFSIZE/2; end = BUFSIZE; ftime = timestamp + (3*duration/(size*4)); break;
}
for(;j<end;j++){
window_sleft[i][j] = window_left[j];
}
extract(window_sleft[i],ftime);
}
index_left = 0;
full_left = FALSE;
}
if(full_right){
/* g_print("RIGHT "); */
for(i=0;i<3;i++){
switch(i){
case 0: j = 0; end = BUFSIZE; ftime = timestamp + (duration/(size*4)); break;
case 1: j = BUFSIZE/4; end = 3*BUFSIZE/4; ftime = timestamp + (duration/(size*2)); break;
case 2: j = BUFSIZE/2; end = BUFSIZE; ftime = timestamp + (3*duration/(size*4)); break;
}
for(;j<end;j++){
window_sright[i][j] = window_right[j];
}
extract(window_sright[i],ftime);
}
index_right = 0;
full_right = FALSE;
}
return TRUE;
}
int initpipe(GMainLoop *loop){
GstBus *bus;
GstCaps *caps;
GstPad *pad;
/* caps = gst_caps_from_string ("audio/x-raw-int,channels=2"); */
caps = gst_caps_from_string ("audio/x-raw-float,channels=2,width=32");
/* create elements */
pipeline = gst_pipeline_new ("audio-player");
source = gst_element_factory_make ("filesrc", "file-source");
/* parser = gst_element_factory_make ("oggdemux", "ogg-parser");
decoder = gst_element_factory_make ("vorbisdec", "vorbis-decoder"); */
decoder = gst_element_factory_make("mad","decoder");
conv = gst_element_factory_make ("audioconvert", "converter");
resample = gst_element_factory_make("audioresample","resampler");
filter = gst_element_factory_make ("capsfilter","filter");
/* sink = gst_element_factory_make ("alsasink", "alsa-output"); */
sink = gst_element_factory_make ("fakesink","fake-output");
if (!pipeline || !source || !decoder || !conv || !resample || !filter || !sink) {
g_print ("One element could not be created\n");
return -1;
}
/* g_object_set (G_OBJECT (source), "location", fileloc, NULL); */
/* set filename property on the file source. Also add a message
* handler. */
g_object_set(G_OBJECT (filter),"caps",caps,NULL);
/* gst_object_unref(caps); */
pad = gst_element_get_pad(resample,"src");
gst_pad_add_buffer_probe (pad,G_CALLBACK (cb_have_data), NULL);
gst_object_unref (pad);
bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/* put all elements in a bin */
gst_bin_add_many (GST_BIN (pipeline),
source, decoder, conv, resample, filter, sink, NULL);
/* link together - note that we cannot link the parser and
* decoder yet, becuse the parser uses dynamic pads. For that,
* we set a pad-added signal handler. */
gst_element_link_many (source, decoder, conv, resample, filter, sink, NULL);
return 0;
}
void processSong(char *pathname,GMainLoop *loop){
char *lastdot;
/* check if file has mp3 extension */
lastdot = strrchr(filename,'.');
if(lastdot!=NULL && (!strcmp(lastdot,".mp3") || !strcmp(lastdot,".MP3"))){
printf("processing: %s\n",filename);
/* open a new file for writing arff output */
sprintf(arff_filename,"%s.arff",filename);
outarff = fopen(arff_filename,"w");
printArffHeader_song(outarff);
/* set the new file location */
gst_element_set_state (pipeline, GST_STATE_NULL);
g_object_set (G_OBJECT (source), "location", pathname, NULL);
/* Now set to playing and iterate. */
/* g_print ("Setting to PLAYING\n"); */
gst_element_set_state (pipeline, GST_STATE_PLAYING);
/* g_print ("Running\n"); */
g_main_loop_run (loop);
fclose(outarff);
}
}
/* process a directory - assume top level is a directory */
void process(char *pathname,GMainLoop *loop){
struct stat sb;
DIR *dirp;
struct dirent *dp;
char pname[1000];
dirp = opendir(pathname);
while(dirp){
if((dp = readdir(dirp)) != NULL){
sprintf(pname,"%s/%s",pathname,dp->d_name);
/* printf("pname: %s\n",pname); */
stat(pname,&sb);
if(S_ISDIR(sb.st_mode) && strcmp(dp->d_name,"..") && strcmp(dp->d_name,".")){
process(pname,loop);
}
else if(S_ISREG(sb.st_mode)){
/* do some file processing */
/* printf("file: %s\n",dp->d_name); */
sprintf(filename,"%s",dp->d_name);
sprintf(songname,"\"%s\"",filename);
processSong(pname,loop);
}
}
else {
if(errno == 0){
closedir(dirp);
}
return;
}
}
}
int
main (int argc,
char *argv[])
{
char* lastslash;
struct stat sb;
GMainLoop *mloop;
/* initialize GStreamer */
gst_init (&argc, &argv);
mloop = g_main_loop_new (NULL, FALSE);
g_set_print_handler(printRedirect);
/* check input arguments */
if (argc < 2) {
g_print ("Usage: %s <MP3 folder name> (optional: <frame size>)\n", argv[0]);
return -1;
}
else if(argc == 3){
sscanf(argv[2],"%d",&arg_framesize);
}
initpipe(mloop);
mainarff = fopen("0_ALL_ACOUSTIC_FEATURES.arff","w");
printArffHeader_main(mainarff);
stat(argv[1],&sb);
if(S_ISDIR(sb.st_mode)){
process(argv[1],mloop);
}
else if(S_ISREG(sb.st_mode)){
/* do some file processing */
/* printf("file: %s\n",dp->d_name); */
lastslash = strrchr(argv[1],'/');
sprintf(filename,"%s",lastslash+1);
sprintf(songname,"\"%s\"",filename);
processSong(argv[1],mloop);
}
/* process(argv[1],mloop); */
/* clean up nicely */
/* g_print ("Returned, stopping playback\n"); */
gst_element_set_state (pipeline, GST_STATE_NULL);
/* g_print ("Deleting pipeline\n"); */
gst_object_unref (GST_OBJECT (pipeline));
fclose(mainarff);
return 0;
}
#endif // MULTI.C
@jvegalo
Copy link

jvegalo commented Apr 30, 2012

Your code is so good, congrats. I have a question, How do i decide the frame size?

@faraday
Copy link
Author

faraday commented Apr 30, 2012

Thank you. About your question, the frame size is selected as

gint size = GST_BUFFER_SIZE(buffer) / (sizeof(gfloat) * 2);
BUFSIZE = size/2;

by default.

However this can be changed according to what you focus. You may want to compute acoustic features for a smaller or larger frame. My actual usage of this code included testing the effects of different frame sizes on resulting acoustic features.

@jvegalo
Copy link

jvegalo commented May 2, 2012

Thank you very much faraday, i have another question, how can i correct this error: libxtract: Error: xtract_spectrum() has uninitialised plan. Sorry if the question is stupid (i am a newbie in audio XD ). Greetings

@jvegalo
Copy link

jvegalo commented May 5, 2012

Maybe the error: "xtract_spectrum() has uninitialised plan" is because XTRACT_MAGNITUDE_SPECTRUM is not defined, if i am wrong let me know please, thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment