pip install librosa
filename = 'path/to/audio_file.wav'
y, sr = librosa.load(filename)
import librosa.display
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
librosa.display.waveplot(y, sr=sr)
plt.show()
A representation of the audio signal's frequency content over time.
import numpy as np
n_fft = 2048
hop_length = 512
stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
spectrogram = librosa.amplitude_to_db(stft, ref=np.max)
plt.figure(figsize=(12, 4))
librosa.display.specshow(spectrogram, sr=sr, hop_length=hop_length, x_axis='time', y_axis='log')
plt.show()
Mel-Frequency Cepstral Coefficients (MFCCs) is a set of coefficients that summarize the audio signal's spectral envelope.
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
plt.figure(figsize=(12, 4))
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
plt.show()
A representation of the audio signal's harmonic content.
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
plt.figure(figsize=(12, 4))
librosa.display.specshow(chroma, sr=sr, x_axis='time')
plt.show()
pip install pydub
from pydub import AudioSegment
audio = AudioSegment.from_file("audio_file.mp3")
audio.play()
audio.export("converted_audio_file.wav", format="wav")
length_in_ms = len(audio)
trimmed_audio = audio[5000:15000]
info = audio.info
# Increase the volume by 10 dB
increased_volume_audio = audio + 10
# Decrease the volume by 10 dB
decreased_volume_audio = audio - 10
pip install PyAudioAnalysis
from PyAudioAnalysis import audioBasicIO
[Fs, x] = audioBasicIO.readAudioFile("path/to/file.wav")
from PyAudioAnalysis import audioFeatureExtraction
F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs)
from PyAudioAnalysis import audioTrainTest as aT
aT.featureAndTrain(["class1_folder", "class2_folder", ...], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svm_model", False)
from PyAudioAnalysis import audioSegmentation as aS
segments = aS.speakerDiarization("path/to/file.wav", 4, "path/to/output/dir/")
from PyAudioAnalysis import audioAnalysis
bpm, beats, beats_confidence = audioAnalysis.beatExtraction("path/to/file.wav", 0.0)
pip install madmom
import madmom
audio_file = madmom.audio.signal.Signal('path/to/audio_file.wav')
import madmom
audio_file = madmom.audio.signal.Signal('path/to/audio_file.wav')
rms = madmom.audio.feature_extraction.rms(audio_file)
import madmom
audio_file = madmom.audio.signal.Signal('path/to/audio_file.wav')
filtered_audio = madmom.audio.filtering.highpass_filter(audio_file)