Skip to content

Instantly share code, notes, and snippets.

@geekscape
Last active January 14, 2023 06:04
Show Gist options
  • Save geekscape/1f172318e8ea754e445d83a085f73b0c to your computer and use it in GitHub Desktop.
Save geekscape/1f172318e8ea754e445d83a085f73b0c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# Installation
# ~~~~~~~~~~~~
# workon work3.9
#
# brew install ffmpeg
# pip install ffmpeg # For "import pydub"
# pip install matplotlib==3.3.3 # Prevent "Segmentation: 11" fault
# pip install numpy
# pip install pydub
# pip install pyqt5 # Required for ... matplotlib.use("Qt5Agg")
#
# To Do
# ~~~~~
# - Visualize using OpenCV
# - Take input from laptop microphone or some othe real-time source
# - Over time as frames ...
# - Extract significant frequencies (by magnitude)
# - Show frequencies as musical notes (chords), e.g 440 Hz --> A4
# - Graph chord progression over time
# - Implement audio generation, e.g multiple sine waves and amplitudes
import matplotlib
# gui_env = ["WXAgg", "Qt5Agg", "TKAgg", "GTKAgg"]
# for gui in gui_env:
# try:
# print("matplotlib test:", gui)
# matplotlib.use(gui, warn=False, force=True)
# from matplotlib import pyplot as plt
# break
# except:
# continue
# Failed attempt to fix Mac OS X renderer issue
matplotlib.use("Qt5Agg") # Fails "Agg", "Qt5Agg", "WXAgg", "TKAgg"
print("matplotlib using:", matplotlib.get_backend())
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import cv2
import numpy as np
from pydub import AudioSegment
import sys
audio_pathname = "sine_440.mp3"
if len(sys.argv) > 1:
audio_pathname = sys.argv[1]
def plotter(frequencies, magnitudes):
if False:
plt.plot(frequencies, magnitudes)
else:
figure, ax = plt.subplots(1, 1)
ax.plot(frequencies, magnitudes)
ax.xaxis.set_major_locator(ticker.MultipleLocator(400))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(100))
plt.xlabel("Frequency (Hz)")
plt.ylabel("Magnitude")
plt.xlim([-1600, 1600])
if True:
plt.show()
else:
buffer = plt.gcf().canvas.tostring_rgb()
image = cv2.imdecode(np.frombuffer(buffer, np.uint8), cv2.IMREAD_COLOR)
cv2.imwrite("test.jpg")
## Approach 1
# canvas = matplotlib.backends.backend_macosx.FigureCanvasMac(plt.gcf())
# buffer = canvas.renderer.buffer_rgba()
## Approach 2
# canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(plt.gcf())
# buffer = canvas.buffer_rgba()
## Common code for approach 1 and 2
# image = np.frombuffer(buffer, np.uint8)
## Reshape array into a 4D image
# image = image.reshape((canvas.get_width_height()[::-1] + (4,)))
## Approach 3
# buffer = plt.gcf().canvas.renderer.buffer_rgba()
# image = np.frombuffer(buffer, np.uint8)
## Reshape array into a 4D image
# image = image.reshape((plt.gcf().canvas.get_width_height()[::-1]+(4,)))
# cv2.imshow("Plot", image)
# cv2.waitKey(0)
audio = AudioSegment.from_file(audio_pathname, format="mp3")
samples = np.array(audio.get_array_of_samples())
sample_rate = audio.frame_rate
chunk_length = len(samples) / audio.frame_rate
print(f"frame_rate: {audio.frame_rate}")
print(f"samples : {len(samples)}")
print(f"chunk_length: {chunk_length}")
num_chunks = int(chunk_length)
chunks = [samples[i*sample_rate:(i+1)*sample_rate] for i in range(num_chunks)]
for chunk in chunks:
frequencies = np.fft.fft(chunk)
magnitudes = np.abs(frequencies)
abs_magnitudes = np.abs(magnitudes)
dominant_index = np.argmax(abs_magnitudes)
frequencies = np.fft.fftfreq(chunk.size, 1/sample_rate)
dominant_frequency = np.abs(frequencies[dominant_index])
print(f"Dominant frequency: {dominant_frequency} Hz")
# plotter(frequencies, magnitudes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment