Skip to content

Instantly share code, notes, and snippets.

@lynzrand
Created April 4, 2023 04:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lynzrand/22d1c191fe7b92035cdb254c2359958e to your computer and use it in GitHub Desktop.
Save lynzrand/22d1c191fe7b92035cdb254c2359958e to your computer and use it in GitHub Desktop.
black==23.1.0
click==8.1.3
contourpy==1.0.7
cycler==0.11.0
fonttools==4.39.2
kiwisolver==1.4.4
matplotlib==3.7.1
mypy-extensions==1.0.0
numpy==1.24.2
packaging==23.0
pathspec==0.11.1
Pillow==9.4.0
pip==23.0.1
platformdirs==3.1.1
pydub==0.25.1
pyparsing==3.0.9
python-dateutil==2.8.2
scipy==1.10.1
setuptools==65.5.0
six==1.16.0
tomli==2.0.1
import math
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import scipy.fftpack as fftpack
import scipy.io.wavfile as wavfile
from PIL import Image
# Load spectrogram and phase images
spectrogram_img = Image.open("spectrogram.png").convert("L")
phase_img = Image.open("phase.png").convert("HSV")
# Convert Spectrogram back to linear scale
spectrogram = np.asarray(spectrogram_img).astype(float) / 255 * 100 - 100
spectrogram = 10 ** (spectrogram / 20)
spectrogram[spectrogram < 1e-10] = 0
# Extract phase component from the HSV image
phase = np.asarray(phase_img)[:, :, 0].astype(float) / 255 * 2 * np.pi - np.pi
# Calculate window size and hop size
WINDOW_SIZE = len(spectrogram[0])
hop_size = int(WINDOW_SIZE * 0.5)
# Calculate FFT frequency bins
freq_bins = fftpack.fftfreq(WINDOW_SIZE)
# Initialize audio signal array
signal = np.zeros((len(spectrogram) - 1) * hop_size + WINDOW_SIZE)
# Apply inverse FFT to each window
for i in range(len(spectrogram)):
# Get FFT magnitudes and phases for this window
fft_data = spectrogram[i] * np.exp(1j * phase[i])
# Apply inverse FFT
window_signal = fftpack.ifft(fft_data)
# Add overlap with previous window to the signal
window_start = i * hop_size
window_end = window_start + WINDOW_SIZE
signal[window_start:window_end] += window_signal.real
# Scale signal up to 16-bit range
signal *= (2**15) - 1
signal = signal.astype(np.int16)
# Save signal to a WAV file
samplerate = 44100
wavfile.write("output.wav", samplerate, signal)
import math
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import scipy.io.wavfile as wavfile
import scipy.fftpack as fftpack
from PIL import Image
# Load audio file
samplerate, data = wavfile.read("input.wav")
# data = data[:, 0]
# Normalize data to [-1, 1]
data = data.astype(float) / 2**15
# Determine window size and overlap
WINDOW_SIZE = 1024
OVERLAP = 0.5
# Create window function
window = signal.blackman(WINDOW_SIZE)
# Calculate FFT frequency bins
freq_bins = fftpack.fftfreq(WINDOW_SIZE, d=1 / samplerate)
# Calculate number of windows and initialize spectrogram and phase arrays
hop_size = int(WINDOW_SIZE * (1 - OVERLAP))
num_windows = int(np.ceil(len(data) / hop_size))
spectrogram = np.zeros((num_windows, len(freq_bins)))
phase = np.zeros((num_windows, len(freq_bins)))
# Perform windowed FFT and populate arrays
for i in range(num_windows):
window_start = i * hop_size
window_end = window_start + WINDOW_SIZE
if window_end > len(data):
window_end = len(data)
window_length = window_end - window_start
# Append zeros to data if window is too short
if window_length < WINDOW_SIZE:
windowed_data = np.append(
data[window_start:window_end], np.zeros(WINDOW_SIZE - window_length)
)
else:
windowed_data = data[window_start:window_end]
windowed_data = windowed_data * window
fft_data = fftpack.fft(windowed_data, n=WINDOW_SIZE)
spectrogram[i, :] = np.abs(fft_data)[: len(freq_bins)]
phase[i, :] = np.angle(fft_data)[: len(freq_bins)]
# Convert spectrogram to dB scale
spectrogram += 1e-10
spectrogram = 20 * np.log10(spectrogram)
# Normalize to [0, 255] where 0 = -100 dB and 255 = 0 dB
spectrogram = (spectrogram + 100) / 100 * 255
# Saturate pixels that are out of range
spectrogram[spectrogram < 0] = 0
spectrogram[spectrogram > 255] = 255
spectrogram = spectrogram.astype(np.uint8)
spectrogram_image = Image.fromarray(spectrogram, "L")
# Normalize and convert phase to hue color image
phase_hue = (phase + np.pi) / (2 * np.pi) * 255
phase_hue = phase_hue.astype(np.uint8)
# S=V=100% for all pixels
phase_saturation = np.ones(phase_hue.shape, dtype=np.uint8) * 255
phase_value = np.ones(phase_hue.shape, dtype=np.uint8) * 255
phase_image = Image.fromarray(
np.dstack((phase_hue, phase_saturation, phase_value)), "HSV"
)
# Convert it into RGB
phase_image = phase_image.convert("RGB")
# Save images as PNG files
spectrogram_image.save("spectrogram.png", "PNG")
phase_image.save("phase.png", "PNG")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment