-
-
Save davidglavas/c33a9eb5bec736e47438ec546f629520 to your computer and use it in GitHub Desktop.
Feature extraction section for simple audio classifier blogpost
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import librosa | |
import numpy as np | |
import pandas as pd | |
from scipy.stats import kurtosis, skew | |
featureVectorLength = 140 # n_mfcc * number_of_summary_statistics | |
def extract_features_from_file(file_name): | |
raw_sound, sample_rate = librosa.load(file_name) | |
# one row per extracted coefficient, one column per frame | |
mfccs = librosa.feature.mfcc(y=raw_sound, sr=sample_rate, n_mfcc=20) | |
mfccs_min = np.min(mfccs, axis=1) # row-wise summaries | |
mfccs_max = np.max(mfccs, axis=1) | |
mfccs_median = np.median(mfccs, axis=1) | |
mfccs_mean = np.mean(mfccs, axis=1) | |
mfccs_variance = np.var(mfccs, axis=1) | |
mfccs_skeweness = skew(mfccs, axis=1) | |
mfccs_kurtosis = kurtosis(mfccs, axis=1) | |
return mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis | |
def extract_features_from_directories(parent_dir, sub_dirs, file_ext="*.wav"): | |
""" | |
Processes all the files in the subdirectories located in the parent directory. For each file in the subdirectories | |
with the correct extension, it extracts the feature vector and stores it into the feature matrix. The labels | |
are inferred from the file names. | |
:param parent_dir: Parent directory that contains the sub directories.. | |
:param sub_dirs: Subdirectories whose files will be processed | |
:param file_ext: File extension of the files which will be processed. | |
:return: A pair. The feature matrix (each row is a feature vector, each column a feature), and the | |
corresponding labels for each row of the matrix. | |
""" | |
feature_matrix, labels = np.empty((0, featureVectorLength)), np.empty(0) | |
for label, sub_dir in enumerate(sub_dirs): | |
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)): | |
try: | |
mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis = extract_features_from_file(fn) | |
print("Finished processing file: ", fn) | |
except Exception as e: | |
print("Error while processing file: ", fn) | |
continue | |
# concatenate extracted features | |
new_feature_vector = np.hstack([mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis]) | |
# add current feature vector as last row in feature matrix | |
feature_matrix = np.vstack([feature_matrix, new_feature_vector]) | |
# extracts label from the file name. Change '\\' to '/' on Unix systems | |
labels = np.append(labels, fn.split('\\')[2].split('-')[1]) | |
return np.array(feature_matrix), np.array(labels, dtype=np.int) | |
def prepare_features(training_dirs, validation_dirs, training_name, validation_name): | |
""" | |
Extracts and stores features and labels as a pandas dataframe and series, respectively. | |
:return: Nothing. Just stores the features and labels as files. | |
""" | |
parent_dir = 'Sound-Data' # name of the directory which contains the recordings | |
training_sub_dirs = training_dirs | |
validation_sub_dirs = validation_dirs | |
# ndarrays | |
training_features, training_labels = extract_features_from_directories(parent_dir, training_sub_dirs) | |
test_features, test_labels = extract_features_from_directories(parent_dir, validation_sub_dirs) | |
# convert ndarray to pandas dataframe | |
training_examples = pd.DataFrame(training_features, columns=list(range(1, featureVectorLength+1))) | |
# convert ndarray to pandas series | |
training_labels = pd.Series(training_labels.tolist()) | |
# convert ndarray to pandas dataframe | |
validation_examples = pd.DataFrame(test_features, columns=list(range(1, featureVectorLength+1))) | |
# convert ndarray to pandas series | |
validation_labels = pd.Series(test_labels.tolist()) | |
# store extracted training data | |
training_examples.to_pickle('Extracted_Features\\' + training_name + '_features.pkl') | |
training_labels.to_pickle('Extracted_Features\\' + training_name + '_labels.pkl') | |
# store extracted validation data | |
validation_examples.to_pickle('Extracted_Features\\' + validation_name + '_features.pkl') | |
validation_labels.to_pickle('Extracted_Features\\' + validation_name + '_labels.pkl') | |
# First 9 folds will be used for training, the tenth for validation. | |
training_dirs = ["fold1", "fold2", "fold3", "fold4", "fold5", "fold6", "fold7", "fold8", "fold9"] | |
validation_dirs = ["fold10"] | |
# prepare_features(training_dirs, validation_dirs, 'notFold10', 'fold10') | |
# Read the stored features and labels: | |
# print(pd.read_pickle('Extracted_Features\\fold10_features.pkl')) | |
# print(pd.read_pickle('Extracted_Features\\fold10_labels.pkl')) | |
def visualize_data(file_name): | |
# Example of how to extract features and obtain the final feature vector from a given audio file: | |
# First we load the audio file. | |
raw_sound, sample_rate = librosa.load(file_name) # file must be in the root folder of your project | |
print("raw_sound:", raw_sound) | |
print("raw_sound.shape:", raw_sound.shape) | |
print("\n") | |
mfccs = librosa.feature.mfcc(y=raw_sound, sr=sample_rate, n_mfcc=20) # compute the MFCC matrix | |
# Next we compute the summary statistics, each of them summarizes the MFCC matrix in its own way. | |
mfccs_min = np.min(mfccs, axis=1) # row-wise minimum, etc | |
mfccs_max = np.max(mfccs, axis=1) | |
mfccs_median = np.median(mfccs, axis=1) | |
mfccs_mean = np.mean(mfccs, axis=1) | |
mfccs_variance = np.var(mfccs, axis=1) | |
mfccs_skeweness = skew(mfccs, axis=1) | |
mfccs_kurtosis = kurtosis(mfccs, axis=1) | |
# We obtain the feature vector by concatenating the different summaries. | |
finalFeatureVector = np.concatenate([mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis]) | |
print("mfccs:", mfccs) | |
print("mfccs.shape:", mfccs.shape) | |
print("\n") | |
print("mfccs_min:", mfccs_min) | |
print("mfccs_min.shape:", mfccs_min.shape) | |
print("\n") | |
print("mfccs_max:", mfccs_max) | |
print("mfccs_max.shape:", mfccs_max.shape) | |
print("\n") | |
print("mfccs_median:", mfccs_median) | |
print("mfccs_median.shape:", mfccs_median.shape) | |
print("\n") | |
print("mfccs_mean:", mfccs_mean) | |
print("mfccs_mean.shape:", mfccs_mean.shape) | |
print("\n") | |
print("mfccs_variance:", mfccs_variance) | |
print("mfccs_variance.shape:", mfccs_variance.shape) | |
print("\n") | |
print("mfccs_skeweness:", mfccs_skeweness) | |
print("mfccs_skeweness.shape:", mfccs_skeweness.shape) | |
print("\n") | |
print("mfccs_kurtosis:", mfccs_kurtosis) | |
print("mfccs_kurtosis.shape:", mfccs_kurtosis.shape) | |
print("\n") | |
print("finalFeatureVector", finalFeatureVector) | |
print("finalFeatureVector.shape:", finalFeatureVector.shape) | |
# visualize_data("24074-1-0-3.wav") # file must be in the project's root folder |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment