Skip to content

Instantly share code, notes, and snippets.

@andres-fr
Last active June 8, 2019 13:22
Show Gist options
  • Save andres-fr/4b253f744a0c7052d6cf8a66d9e504e7 to your computer and use it in GitHub Desktop.
Save andres-fr/4b253f744a0c7052d6cf8a66d9e504e7 to your computer and use it in GitHub Desktop.
Given 2 mono wavs, finds out if one is a chunk of the other and returns the positions with sample precision. Includes a test.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Given the paths to 2 wav audio files, this script tells whether one is a subset
of the other, and in which range.
Usage help:
python <THIS_SCRIPT> -h
"""
import argparse
import soundfile as sf
__author__ = "Andres FR"
# #############################################################################
# ## HELPERS
# #############################################################################
def numpy_find_sublist(arr1, arr2):
"""
:param ndarray arr1: 1D numpy array
:param ndarray arr2: 1D numpy array of same dtype as arr1 (e.g. np.float32)
:returns: a tuple (A, B) where A is the tuple (beg, end) if
arr2[beg:end]==arr1, and None otherwise. B works the same for arr2.
"""
result1, result2 = None, None
#
assert arr1.dtype == arr2.dtype, "arrays must have same dtype!"
assert len(arr1.shape) == len(arr2.shape) == 1, "arrays must be 1D!"
elem_bytes = arr1.itemsize # should be the same for both
str1, str2 = arr1.tostring(), arr2.tostring()
len1, len2 = arr1.shape[0], arr2.shape[0]
# find if arr1 is a sublist of arr2
try:
beg1 = str2.index(str1) // elem_bytes
result1 = (beg1, beg1 + len1)
except ValueError:
pass
# find if arr2 is a sublist of arr1
try:
beg2 = str1.index(str2) // elem_bytes
result2 = (beg2, beg2 + len2)
except ValueError:
pass
#
return (result1, result2)
def test_numpy_find_sublists(n=2000, arrsize=100000):
"""
Creates a random array of ``arrsize`` ``n`` times. Each time, a
random subarray is extracted, and both are passed in random order to
numpy_find_sublist. If the function doesn't detect the correct relation
for a given iteration, an AssertionError will be raised. Otherwise,
prints success and returns True.
"""
import numpy as np
for i in range(n):
test_arr = np.random.rand(arrsize)
test_beg, test_end = sorted(np.random.randint(0, arrsize, 2))
test_subarr = test_arr[test_beg:test_end].copy()
# test for subarr both ways
if test_beg % 2 == 0: # flip a coin
(tb, te), res2 = numpy_find_sublist(test_subarr, test_arr)
# print(res1, res2)
# tb, te = res2
assert res2 is None, "test failed! 1"
assert tb == test_beg and te == test_end, "test failed! 2"
else:
res1, (tb, te) = numpy_find_sublist(test_arr, test_subarr)
# print(res1, res2)
# tb, te = res1
assert res1 is None, "test failed! 3"
assert tb == test_beg and te == test_end, "test failed! 4"
print("test passed!")
return True
# #############################################################################
# ## MAIN ROUTINE
# #############################################################################
def main():
"""
1. read arguments (check with -h)
2. if test was requested: test. If fails, raise exception.
3. load wavs. If samplerates are different: print warning
4. call subset finder subroutine and gather results
5. Human-friendly report of results
"""
# parse arguments from command line:
parser = argparse.ArgumentParser(description="GUI to test WAV-MVNX synch")
parser.add_argument("-w1", "--wav_path1", help="absolute path",
required=True)
parser.add_argument("-w2", "--wav_path2", help="absolute path",
required=True)
parser.add_argument("-T", "--test", help="call test function before start",
action="store_true")
args = parser.parse_args()
# main globals
WAV_PATH1 = args.wav_path1
WAV_PATH2 = args.wav_path2
TEST = args.test
#
if TEST:
assert test_numpy_find_sublists(1000), "test failed!"
# load wavs
wav1, sr1 = sf.read(WAV_PATH1)
wav2, sr2 = sf.read(WAV_PATH2)
if sr1 != sr2:
print("WARNING: audios have different samplerates:", (sr1, sr2))
# compare wavs and print results
result1, result2 = numpy_find_sublist(wav1, wav2)
print("length(wav1) = %d, length(wav2) = %d, samplerates = %s" % (
wav1.shape[0], wav2.shape[0], str((sr1, sr2))))
if result1 is not None:
print("wav1 is part of wav2 in the following wav2 range:", result1)
if result2 is not None:
print("wav2 is part of wav1 in the following wav1 range:", result2)
if result1 == result2 == None:
print("no matches found")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment