Skip to content

Instantly share code, notes, and snippets.

@schollz
Created January 31, 2022 22:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schollz/cde86c4038764f7a461a577aa5319a32 to your computer and use it in GitHub Desktop.
Save schollz/cde86c4038764f7a461a577aa5319a32 to your computer and use it in GitHub Desktop.
# python3 depop.py file1.wav file1_depopped.wav
__author__ = "ezra buchla, zack scholl"
import shutil
from scipy import signal
from scipy.io import wavfile
import numpy as np
debug_depop = False
if debug_depop:
import matplotlib.pyplot as plt
# (adapted from ezra's excise.m, all mistakes are from zack)
# %% excise a portion of given (audio) signal with crossfade
# %%
# %% it is assumed that the exact final duration is flexible,
# %% so a fade time is programatically determined
# %%
# %% parameters:
# %% a, b: first and last indices of region to excise
# %% nw: window length (maximum crossfade duration in samples)
# %%
# %% returns:
# %% Y: new signal
# %% c: correlation peak value
# %% nf: actual count of crossfade samples
def excise(X, a, b, nw=1000):
# count of excised samples
nx = b-a+1;
# restrict window size
nw = np.amin([a-1, nw]);
nw = np.amin([len(X)-b, nw]);
# windows preceding and following excision,
# which we'll examine for correlation
W = X[a-nw:a];
V = X[b:b+nw];
# find lag time maximizing cross-correlation
C = signal.correlate(W,V,'full')
lags = signal.correlation_lags(W.size,V.size,mode="full")
# remove negative lags
ipos = np.where(lags>=0)
C = C[ipos]
lags = lags[ipos]
idx = np.argmax(C)
cmax = C[idx]
r = int(lags[idx])
# xfade duration
nf = nw - r
# build output
# initial segment
Y0 = X[:a-nf]
# ending segment
Y1 = X[b+nf:]
# calculate xfaded segment
phi = np.linspace(0,1,nf)
W = X[a-nf:a]
V = X[b:b+nf]
# FIXME:
# should choose interpolation type based on degree of correlation
# (linear if totally correlated, equal-power if uncorrelated)
# for now, just use linear
Yf = np.multiply(V,phi)
Yf = Yf + np.multiply(W,1-phi)
Y = Y0
Y = np.append(Y,Yf)
Y = np.append(Y,Y1)
return Y
def depop(filename,newfilename,channel=0):
# read in file
samplerate, data = wavfile.read(filename)
# collect data-type information and normalize
original_type = data.dtype
data_max = (np.iinfo(original_type).max)
data = data.astype(float)
data = data / data_max
# compute basic properties
length = data.shape[0] / samplerate
num_channels = data.shape[1]
time = np.linspace(0., length, data.shape[0])
# stft of the channel
# go through each band and, if its soft,
# collect it to be averaged later
f, t, Zxx = signal.stft(data[:, channel], samplerate, window=signal.get_window("hann",128),nperseg=128)
zmean = np.zeros(Zxx[1,:].shape)
znum = 0
for i in range(Zxx.shape[0]):
z = 20*np.log10(np.abs(Zxx[i,:]))
z = z - np.mean(z[-500:])
# find soft audio regions and use them for the average
# arbitrary but works okay
if np.sum(z) < 10000:
zmean = np.add(zmean,z)
znum += 1
# average all the soft bands
zmean = zmean / znum
zmean = zmean - np.mean(zmean[-500:])
z_std = np.std(zmean[-500:])
peaks, _ = signal.find_peaks(zmean,height=z_std*6,prominence=10)
if debug_depop:
print(peaks)
plt.plot(t,zmean)
plt.plot(t[peaks],zmean[peaks],'o')
plt.title("ch {}".format(channel))
plt.show()
for _, ind in enumerate(peaks):
if t[ind] < 0.005 or t[ind] > length-0.005:
continue
# assume a pop width of 6 ms
r = np.where(np.logical_and(time>t[ind]-0.003,time<t[ind]+0.003))
score = zmean[ind]/z_std
# anything over 20 sd's is probably a pop
if score < 5:
if debug_depop:
print("too low score: {}".format(score))
continue
data0 = excise(data[:,0],r[0][0],r[0][-1])
if num_channels > 1:
data1 = excise(data[:,1],r[0][0],r[0][-1])
newlen = min([len(data0),len(data1)])
newdata = np.column_stack((data0[:newlen],data1[:newlen]))*data_max
else:
newdata = data0
wavfile.write(newfilename,samplerate,newdata.astype(original_type))
return "removed a pop at {}".format(t[ind])
return ""
def depop_file(filename,newfilename=""):
if newfilename=="":
newfilename="foo.wav"
shutil.copy(filename,newfilename)
for channel in range(0,2):
for i in range(10):
result = depop(newfilename,newfilename,channel)
if result == "":
break
print(result + " in file " + filename + ", channel {}".format(channel+1))
if newfilename == "foo.wav":
shutil.copy(newfilename,filename)
import sys
filename = sys.argv[1]
filename2 = sys.argv[2]
depop_file(filename,filename2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment