mauri870/tensorflow_audio_to_mfcc.py

## tensorflow_audio_to_mfcc.py
import tensorflow as tf
# FIXME: audio_ops.decode_wav is deprecated, use tensorflow_io.IOTensor.from_audio
from tensorflow.contrib.framework.python.ops import audio_ops

# Enable eager execution for a more interactive frontend.
# If using the default graph mode, you'll probably need to run in a session.
tf.enable_eager_execution()

@tf.function
def audio_to_mfccs(
        audio_contents,
        channels=1,
        sample_rate=8000
    ):
    waveform = audio_ops.decode_wav(
            audio_contents, desired_channels=channels)

    # FIXME: Maybe tf.transpose is not needed in tf 2.x.
    stfts = tf.contrib.signal.stft(tf.transpose(waveform.audio), frame_length=1024, frame_step=256, fft_length=1024)

    spectrograms = tf.abs(stfts)

    # Warp the linear scale spectrograms into the mel-scale.
    num_spectrogram_bins = stfts.shape[-1].value
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80, sample_rate / 2, 128
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
      num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)
    mel_spectrograms = tf.tensordot(
      spectrograms, linear_to_mel_weight_matrix, 1)
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
      linear_to_mel_weight_matrix.shape[-1:]))

    # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    # Compute MFCCs from log_mel_spectrograms
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
      log_mel_spectrograms)

    return mfccs

if __name__ == '__main__':
    input_file = tf.constant('test.wav')

    # Compute the mfccs
    audio = tf.io.read_file(input_file)
    mfccs = audio_to_mfccs(audio)

    # Get only the first 20
    print(mfccs[..., :20])
	import tensorflow as tf
	# FIXME: audio_ops.decode_wav is deprecated, use tensorflow_io.IOTensor.from_audio
	from tensorflow.contrib.framework.python.ops import audio_ops

	# Enable eager execution for a more interactive frontend.
	# If using the default graph mode, you'll probably need to run in a session.
	tf.enable_eager_execution()

	@tf.function
	def audio_to_mfccs(
	audio_contents,
	channels=1,
	sample_rate=8000
	):
	waveform = audio_ops.decode_wav(
	audio_contents, desired_channels=channels)

	# FIXME: Maybe tf.transpose is not needed in tf 2.x.
	stfts = tf.contrib.signal.stft(tf.transpose(waveform.audio), frame_length=1024, frame_step=256, fft_length=1024)

	spectrograms = tf.abs(stfts)

	# Warp the linear scale spectrograms into the mel-scale.
	num_spectrogram_bins = stfts.shape[-1].value
	lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80, sample_rate / 2, 128
	linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
	num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)
	mel_spectrograms = tf.tensordot(
	spectrograms, linear_to_mel_weight_matrix, 1)
	mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
	linear_to_mel_weight_matrix.shape[-1:]))

	# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
	log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

	# Compute MFCCs from log_mel_spectrograms
	mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
	log_mel_spectrograms)

	return mfccs

	if __name__ == '__main__':
	input_file = tf.constant('test.wav')

	# Compute the mfccs
	audio = tf.io.read_file(input_file)
	mfccs = audio_to_mfccs(audio)

	# Get only the first 20
	print(mfccs[..., :20])