This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract the first second of audio | |
import essentia | |
import essentia.standard | |
loader = essentia.standard.MonoLoader(filename='example.wav', sampleRate=44100) | |
audio = loader() | |
sec_audio = audio[0:44100] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot the first second of audio | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
plt.plot(sec_audio) | |
plt.title('Sound Wave') | |
plt.xlabel('Samples') | |
plt.ylabel('Amplitude') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
window = np.hanning(25) | |
plt.plot(window) | |
plt.title('Hanning Window') | |
plt.xlabel('Time (ms)') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define function for computing mels | |
def mel_formula(f): | |
return 2595 * np.log(1 + f/700) | |
# Frequency range | |
f_low = 40 | |
f_high = 44100 / 2 # Nyquist frequency | |
f = np.linspace(f_low, f_high) | |
# Plot relationship between frequency and mels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def inverse_mel_formula(m): | |
return 700 * (np.power(10, m/2595) - 1) | |
# Convert back to frequency | |
f = inverse_mel_formula(m) | |
print(f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for n in range(n_bands): | |
plt.plot(f[n:n+3], [0, 1, 0]) # Does the "+ 2" make sense now? | |
plt.title('Mel Filterbanks') | |
plt.xlabel('Frequency (Hz)') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from essentia.standard import MFCC, FrameGenerator | |
mfcc = MFCC(highFrequencyBound=f_high, | |
lowFrequencyBound=f_low, | |
inputSize=1024, | |
numberBands=n_bands, | |
numberCoefficients=13, | |
type='magnitude', | |
sampleRate=44100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from essentia.standard import UnaryOperator | |
# Convert to decibels | |
log10 = UnaryOperator(type='log10') | |
log_mels = log10(mels) | |
# Plot the mel band powers | |
plt.bar(np.arange(len(log_mels)), log_mels, align='center') | |
plt.title('Log of the Mel Band Powers') | |
plt.xlabel('Mel Bands') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from essentia.standard import MelBands | |
# Compute the mel band powers | |
melbands = MelBands(lowFrequencyBound=f_low, | |
highFrequencyBound=f_high, | |
inputSize=1024, | |
numberBands=n_bands, | |
type='magnitude', # We already computed the power. | |
sampleRate=44100) | |
mels = melbands(spec) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from essentia.standard import Windowing, PowerSpectrum | |
# Compute the spectrum of a frame | |
w = Windowing(type='hann') | |
spectrum = PowerSpectrum(size=1024) | |
frame = audio[0:1024] | |
spec = spectrum(w(frame)) | |
# Plot the spectrum of a frame | |
plt.plot(spec) |
OlderNewer