m4p/double_ender_sync.py

## double_ender_sync.py
#!/usr/local/bin/python3
from pydub import AudioSegment
from pyAudioAnalysis import audioBasicIO as aIO
from pyAudioAnalysis import audioSegmentation as aS
import sys
import numpy
from scipy.io import wavfile
from scipy.signal import fftconvolve

def usage():
    sys.exit("Usage: double_ender_sync master.wav sync.wav sync2.wav ...")

if len(sys.argv) < 3:
    usage()


master = AudioSegment.from_wav(sys.argv[1])
master = master.set_channels(1)


files_to_sync = sys.argv[2:]

filenumber = 0

for sync_filename in files_to_sync:

	print("Syncing %s to %s" %(sync_filename,sys.argv[1]))
	filenumber = filenumber + 1

	sync   = AudioSegment.from_file(sync_filename)
	sync   = sync.set_channels(1)

	# First reduce file sizes by only looking at relevant areas

	needle_abs_index = 0

	offset = abs(len(sync)-len(master)) * 1.05

	# If there's less then 5 minutes difference, give us a bit more headroom.
	if (offset<5*60*1000):
		offset = 5*60*1000

	search_area = master[:offset*2]
	sample_area = sync[offset:10*60*1000+offset]

	needle_abs_index = offset

	search_area.export("search_area.wav", format="wav")
	sample_area.export("sample_area.wav", format="wav")


	# Segment sample area into speech bits and use first one to locate within

	[Fs, x] = aIO.readAudioFile("sample_area.wav")
	segments = aS.silenceRemoval(x, Fs, 0.05, 0.05, 1.0, 0.8, False)

	for timeidx in segments:
		start = timeidx[0] * 1000
		end = timeidx[1] * 1000
		needle_abs_index = needle_abs_index + start

		if (end-start>2*1000):
			needle =  sample_area[start:end]
			print("Found a needle")
			needle.export("needle.wav", format="wav")
			break

	# Search code adapted from wavgrep.py (https://gist.github.com/patrakov/8a8095721ee81d49f16c)

	needle_rate, needle = wavfile.read("needle.wav")
	haystack_rate, haystack = wavfile.read("search_area.wav")

	if needle_rate != haystack_rate:
		sys.exit("Sample rates are not the same")

	needle = numpy.array(needle, dtype=numpy.float64)
	needle_len = len(needle)
	haystack = numpy.array(haystack, dtype=numpy.float64)
	haystack_len = len(haystack)

	needle_norm = needle.dot(needle)

	if needle_norm < 1000.0:
		sys.exit("The needle is almost silent")

	haystack_squared = numpy.hstack(([0.0], haystack * haystack))
	haystack_cum_norm = numpy.cumsum(haystack_squared)
	haystack_norm_at = haystack_cum_norm[needle_len:haystack_len + 1] - haystack_cum_norm[0:haystack_len + 1 - needle_len]

	correlation_at = fftconvolve(haystack, needle[::-1], mode='valid')
	difference_norm_at = haystack_norm_at + needle_norm - 2 * correlation_at
	cos2phi_at = correlation_at * correlation_at / (haystack_norm_at + 0.000001) / needle_norm

	at = numpy.argmin(difference_norm_at)

	# Calculate diffs and write synced file.

	time_offset = abs(needle_abs_index-(at/haystack_rate*1000))

	print("Absolute needle pos: %d" % needle_abs_index)
	print("The needle starts at ms: %d" % round(at/haystack_rate*1000))
	print("Time Offset: %d seconds" % round(time_offset/1000))

	synced = sync[time_offset:]
	synced.export("synced-track%d.wav" % filenumber, format="wav")
	#!/usr/local/bin/python3
	from pydub import AudioSegment
	from pyAudioAnalysis import audioBasicIO as aIO
	from pyAudioAnalysis import audioSegmentation as aS
	import sys
	import numpy
	from scipy.io import wavfile
	from scipy.signal import fftconvolve

	def usage():
	sys.exit("Usage: double_ender_sync master.wav sync.wav sync2.wav ...")

	if len(sys.argv) < 3:
	usage()


	master = AudioSegment.from_wav(sys.argv[1])
	master = master.set_channels(1)


	files_to_sync = sys.argv[2:]

	filenumber = 0

	for sync_filename in files_to_sync:

	print("Syncing %s to %s" %(sync_filename,sys.argv[1]))
	filenumber = filenumber + 1

	sync = AudioSegment.from_file(sync_filename)
	sync = sync.set_channels(1)

	# First reduce file sizes by only looking at relevant areas

	needle_abs_index = 0

	offset = abs(len(sync)-len(master)) * 1.05

	# If there's less then 5 minutes difference, give us a bit more headroom.
	if (offset<5601000):
	offset = 5601000

	search_area = master[:offset*2]
	sample_area = sync[offset:10601000+offset]

	needle_abs_index = offset

	search_area.export("search_area.wav", format="wav")
	sample_area.export("sample_area.wav", format="wav")


	# Segment sample area into speech bits and use first one to locate within

	[Fs, x] = aIO.readAudioFile("sample_area.wav")
	segments = aS.silenceRemoval(x, Fs, 0.05, 0.05, 1.0, 0.8, False)

	for timeidx in segments:
	start = timeidx[0] * 1000
	end = timeidx[1] * 1000
	needle_abs_index = needle_abs_index + start

	if (end-start>2*1000):
	needle = sample_area[start:end]
	print("Found a needle")
	needle.export("needle.wav", format="wav")
	break

	# Search code adapted from wavgrep.py (https://gist.github.com/patrakov/8a8095721ee81d49f16c)

	needle_rate, needle = wavfile.read("needle.wav")
	haystack_rate, haystack = wavfile.read("search_area.wav")

	if needle_rate != haystack_rate:
	sys.exit("Sample rates are not the same")

	needle = numpy.array(needle, dtype=numpy.float64)
	needle_len = len(needle)
	haystack = numpy.array(haystack, dtype=numpy.float64)
	haystack_len = len(haystack)

	needle_norm = needle.dot(needle)

	if needle_norm < 1000.0:
	sys.exit("The needle is almost silent")

	haystack_squared = numpy.hstack(([0.0], haystack * haystack))
	haystack_cum_norm = numpy.cumsum(haystack_squared)
	haystack_norm_at = haystack_cum_norm[needle_len:haystack_len + 1] - haystack_cum_norm[0:haystack_len + 1 - needle_len]

	correlation_at = fftconvolve(haystack, needle[::-1], mode='valid')
	difference_norm_at = haystack_norm_at + needle_norm - 2 * correlation_at
	cos2phi_at = correlation_at * correlation_at / (haystack_norm_at + 0.000001) / needle_norm

	at = numpy.argmin(difference_norm_at)

	# Calculate diffs and write synced file.

	time_offset = abs(needle_abs_index-(at/haystack_rate*1000))

	print("Absolute needle pos: %d" % needle_abs_index)
	print("The needle starts at ms: %d" % round(at/haystack_rate*1000))
	print("Time Offset: %d seconds" % round(time_offset/1000))

	synced = sync[time_offset:]
	synced.export("synced-track%d.wav" % filenumber, format="wav")