Skip to content

Instantly share code, notes, and snippets.

@mauri870
Last active October 12, 2022 13:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mauri870/4d7f630bd82a4c2f5fb54845064371f3 to your computer and use it in GitHub Desktop.
Save mauri870/4d7f630bd82a4c2f5fb54845064371f3 to your computer and use it in GitHub Desktop.
Wav audio to mfcc features in tensorflow 1.15
import tensorflow as tf
# FIXME: audio_ops.decode_wav is deprecated, use tensorflow_io.IOTensor.from_audio
from tensorflow.contrib.framework.python.ops import audio_ops
# Enable eager execution for a more interactive frontend.
# If using the default graph mode, you'll probably need to run in a session.
tf.enable_eager_execution()
@tf.function
def audio_to_mfccs(
audio_contents,
channels=1,
sample_rate=8000
):
waveform = audio_ops.decode_wav(
audio_contents, desired_channels=channels)
# FIXME: Maybe tf.transpose is not needed in tf 2.x.
stfts = tf.contrib.signal.stft(tf.transpose(waveform.audio), frame_length=1024, frame_step=256, fft_length=1024)
spectrograms = tf.abs(stfts)
# Warp the linear scale spectrograms into the mel-scale.
num_spectrogram_bins = stfts.shape[-1].value
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80, sample_rate / 2, 128
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)
mel_spectrograms = tf.tensordot(
spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
linear_to_mel_weight_matrix.shape[-1:]))
# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
# Compute MFCCs from log_mel_spectrograms
mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
log_mel_spectrograms)
return mfccs
if __name__ == '__main__':
input_file = tf.constant('test.wav')
# Compute the mfccs
audio = tf.io.read_file(input_file)
mfccs = audio_to_mfccs(audio)
# Get only the first 20
print(mfccs[..., :20])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment