Skip to content

Instantly share code, notes, and snippets.

@davidglavas
Created May 21, 2018 12:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidglavas/c33a9eb5bec736e47438ec546f629520 to your computer and use it in GitHub Desktop.
Save davidglavas/c33a9eb5bec736e47438ec546f629520 to your computer and use it in GitHub Desktop.
Feature extraction section for simple audio classifier blogpost
import glob
import os
import librosa
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
featureVectorLength = 140 # n_mfcc * number_of_summary_statistics
def extract_features_from_file(file_name):
raw_sound, sample_rate = librosa.load(file_name)
# one row per extracted coefficient, one column per frame
mfccs = librosa.feature.mfcc(y=raw_sound, sr=sample_rate, n_mfcc=20)
mfccs_min = np.min(mfccs, axis=1) # row-wise summaries
mfccs_max = np.max(mfccs, axis=1)
mfccs_median = np.median(mfccs, axis=1)
mfccs_mean = np.mean(mfccs, axis=1)
mfccs_variance = np.var(mfccs, axis=1)
mfccs_skeweness = skew(mfccs, axis=1)
mfccs_kurtosis = kurtosis(mfccs, axis=1)
return mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis
def extract_features_from_directories(parent_dir, sub_dirs, file_ext="*.wav"):
"""
Processes all the files in the subdirectories located in the parent directory. For each file in the subdirectories
with the correct extension, it extracts the feature vector and stores it into the feature matrix. The labels
are inferred from the file names.
:param parent_dir: Parent directory that contains the sub directories..
:param sub_dirs: Subdirectories whose files will be processed
:param file_ext: File extension of the files which will be processed.
:return: A pair. The feature matrix (each row is a feature vector, each column a feature), and the
corresponding labels for each row of the matrix.
"""
feature_matrix, labels = np.empty((0, featureVectorLength)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
try:
mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis = extract_features_from_file(fn)
print("Finished processing file: ", fn)
except Exception as e:
print("Error while processing file: ", fn)
continue
# concatenate extracted features
new_feature_vector = np.hstack([mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis])
# add current feature vector as last row in feature matrix
feature_matrix = np.vstack([feature_matrix, new_feature_vector])
# extracts label from the file name. Change '\\' to '/' on Unix systems
labels = np.append(labels, fn.split('\\')[2].split('-')[1])
return np.array(feature_matrix), np.array(labels, dtype=np.int)
def prepare_features(training_dirs, validation_dirs, training_name, validation_name):
"""
Extracts and stores features and labels as a pandas dataframe and series, respectively.
:return: Nothing. Just stores the features and labels as files.
"""
parent_dir = 'Sound-Data' # name of the directory which contains the recordings
training_sub_dirs = training_dirs
validation_sub_dirs = validation_dirs
# ndarrays
training_features, training_labels = extract_features_from_directories(parent_dir, training_sub_dirs)
test_features, test_labels = extract_features_from_directories(parent_dir, validation_sub_dirs)
# convert ndarray to pandas dataframe
training_examples = pd.DataFrame(training_features, columns=list(range(1, featureVectorLength+1)))
# convert ndarray to pandas series
training_labels = pd.Series(training_labels.tolist())
# convert ndarray to pandas dataframe
validation_examples = pd.DataFrame(test_features, columns=list(range(1, featureVectorLength+1)))
# convert ndarray to pandas series
validation_labels = pd.Series(test_labels.tolist())
# store extracted training data
training_examples.to_pickle('Extracted_Features\\' + training_name + '_features.pkl')
training_labels.to_pickle('Extracted_Features\\' + training_name + '_labels.pkl')
# store extracted validation data
validation_examples.to_pickle('Extracted_Features\\' + validation_name + '_features.pkl')
validation_labels.to_pickle('Extracted_Features\\' + validation_name + '_labels.pkl')
# First 9 folds will be used for training, the tenth for validation.
training_dirs = ["fold1", "fold2", "fold3", "fold4", "fold5", "fold6", "fold7", "fold8", "fold9"]
validation_dirs = ["fold10"]
# prepare_features(training_dirs, validation_dirs, 'notFold10', 'fold10')
# Read the stored features and labels:
# print(pd.read_pickle('Extracted_Features\\fold10_features.pkl'))
# print(pd.read_pickle('Extracted_Features\\fold10_labels.pkl'))
def visualize_data(file_name):
# Example of how to extract features and obtain the final feature vector from a given audio file:
# First we load the audio file.
raw_sound, sample_rate = librosa.load(file_name) # file must be in the root folder of your project
print("raw_sound:", raw_sound)
print("raw_sound.shape:", raw_sound.shape)
print("\n")
mfccs = librosa.feature.mfcc(y=raw_sound, sr=sample_rate, n_mfcc=20) # compute the MFCC matrix
# Next we compute the summary statistics, each of them summarizes the MFCC matrix in its own way.
mfccs_min = np.min(mfccs, axis=1) # row-wise minimum, etc
mfccs_max = np.max(mfccs, axis=1)
mfccs_median = np.median(mfccs, axis=1)
mfccs_mean = np.mean(mfccs, axis=1)
mfccs_variance = np.var(mfccs, axis=1)
mfccs_skeweness = skew(mfccs, axis=1)
mfccs_kurtosis = kurtosis(mfccs, axis=1)
# We obtain the feature vector by concatenating the different summaries.
finalFeatureVector = np.concatenate([mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis])
print("mfccs:", mfccs)
print("mfccs.shape:", mfccs.shape)
print("\n")
print("mfccs_min:", mfccs_min)
print("mfccs_min.shape:", mfccs_min.shape)
print("\n")
print("mfccs_max:", mfccs_max)
print("mfccs_max.shape:", mfccs_max.shape)
print("\n")
print("mfccs_median:", mfccs_median)
print("mfccs_median.shape:", mfccs_median.shape)
print("\n")
print("mfccs_mean:", mfccs_mean)
print("mfccs_mean.shape:", mfccs_mean.shape)
print("\n")
print("mfccs_variance:", mfccs_variance)
print("mfccs_variance.shape:", mfccs_variance.shape)
print("\n")
print("mfccs_skeweness:", mfccs_skeweness)
print("mfccs_skeweness.shape:", mfccs_skeweness.shape)
print("\n")
print("mfccs_kurtosis:", mfccs_kurtosis)
print("mfccs_kurtosis.shape:", mfccs_kurtosis.shape)
print("\n")
print("finalFeatureVector", finalFeatureVector)
print("finalFeatureVector.shape:", finalFeatureVector.shape)
# visualize_data("24074-1-0-3.wav") # file must be in the project's root folder
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment