Skip to content

Instantly share code, notes, and snippets.

@m4p
Created June 17, 2019 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m4p/9631ea6557fc56bd8bf100c3ecf8eff0 to your computer and use it in GitHub Desktop.
Save m4p/9631ea6557fc56bd8bf100c3ecf8eff0 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python3
from pydub import AudioSegment
from pyAudioAnalysis import audioBasicIO as aIO
from pyAudioAnalysis import audioSegmentation as aS
import sys
import numpy
from scipy.io import wavfile
from scipy.signal import fftconvolve
def usage():
sys.exit("Usage: double_ender_sync master.wav sync.wav sync2.wav ...")
if len(sys.argv) < 3:
usage()
master = AudioSegment.from_wav(sys.argv[1])
master = master.set_channels(1)
files_to_sync = sys.argv[2:]
filenumber = 0
for sync_filename in files_to_sync:
print("Syncing %s to %s" %(sync_filename,sys.argv[1]))
filenumber = filenumber + 1
sync = AudioSegment.from_file(sync_filename)
sync = sync.set_channels(1)
# First reduce file sizes by only looking at relevant areas
needle_abs_index = 0
offset = abs(len(sync)-len(master)) * 1.05
# If there's less then 5 minutes difference, give us a bit more headroom.
if (offset<5*60*1000):
offset = 5*60*1000
search_area = master[:offset*2]
sample_area = sync[offset:10*60*1000+offset]
needle_abs_index = offset
search_area.export("search_area.wav", format="wav")
sample_area.export("sample_area.wav", format="wav")
# Segment sample area into speech bits and use first one to locate within
[Fs, x] = aIO.readAudioFile("sample_area.wav")
segments = aS.silenceRemoval(x, Fs, 0.05, 0.05, 1.0, 0.8, False)
for timeidx in segments:
start = timeidx[0] * 1000
end = timeidx[1] * 1000
needle_abs_index = needle_abs_index + start
if (end-start>2*1000):
needle = sample_area[start:end]
print("Found a needle")
needle.export("needle.wav", format="wav")
break
# Search code adapted from wavgrep.py (https://gist.github.com/patrakov/8a8095721ee81d49f16c)
needle_rate, needle = wavfile.read("needle.wav")
haystack_rate, haystack = wavfile.read("search_area.wav")
if needle_rate != haystack_rate:
sys.exit("Sample rates are not the same")
needle = numpy.array(needle, dtype=numpy.float64)
needle_len = len(needle)
haystack = numpy.array(haystack, dtype=numpy.float64)
haystack_len = len(haystack)
needle_norm = needle.dot(needle)
if needle_norm < 1000.0:
sys.exit("The needle is almost silent")
haystack_squared = numpy.hstack(([0.0], haystack * haystack))
haystack_cum_norm = numpy.cumsum(haystack_squared)
haystack_norm_at = haystack_cum_norm[needle_len:haystack_len + 1] - haystack_cum_norm[0:haystack_len + 1 - needle_len]
correlation_at = fftconvolve(haystack, needle[::-1], mode='valid')
difference_norm_at = haystack_norm_at + needle_norm - 2 * correlation_at
cos2phi_at = correlation_at * correlation_at / (haystack_norm_at + 0.000001) / needle_norm
at = numpy.argmin(difference_norm_at)
# Calculate diffs and write synced file.
time_offset = abs(needle_abs_index-(at/haystack_rate*1000))
print("Absolute needle pos: %d" % needle_abs_index)
print("The needle starts at ms: %d" % round(at/haystack_rate*1000))
print("Time Offset: %d seconds" % round(time_offset/1000))
synced = sync[time_offset:]
synced.export("synced-track%d.wav" % filenumber, format="wav")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment