Last active
June 8, 2019 13:22
-
-
Save andres-fr/4b253f744a0c7052d6cf8a66d9e504e7 to your computer and use it in GitHub Desktop.
Given 2 mono wavs, finds out if one is a chunk of the other and returns the positions with sample precision. Includes a test.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
""" | |
Given the paths to 2 wav audio files, this script tells whether one is a subset | |
of the other, and in which range. | |
Usage help: | |
python <THIS_SCRIPT> -h | |
""" | |
import argparse | |
import soundfile as sf | |
__author__ = "Andres FR" | |
# ############################################################################# | |
# ## HELPERS | |
# ############################################################################# | |
def numpy_find_sublist(arr1, arr2): | |
""" | |
:param ndarray arr1: 1D numpy array | |
:param ndarray arr2: 1D numpy array of same dtype as arr1 (e.g. np.float32) | |
:returns: a tuple (A, B) where A is the tuple (beg, end) if | |
arr2[beg:end]==arr1, and None otherwise. B works the same for arr2. | |
""" | |
result1, result2 = None, None | |
# | |
assert arr1.dtype == arr2.dtype, "arrays must have same dtype!" | |
assert len(arr1.shape) == len(arr2.shape) == 1, "arrays must be 1D!" | |
elem_bytes = arr1.itemsize # should be the same for both | |
str1, str2 = arr1.tostring(), arr2.tostring() | |
len1, len2 = arr1.shape[0], arr2.shape[0] | |
# find if arr1 is a sublist of arr2 | |
try: | |
beg1 = str2.index(str1) // elem_bytes | |
result1 = (beg1, beg1 + len1) | |
except ValueError: | |
pass | |
# find if arr2 is a sublist of arr1 | |
try: | |
beg2 = str1.index(str2) // elem_bytes | |
result2 = (beg2, beg2 + len2) | |
except ValueError: | |
pass | |
# | |
return (result1, result2) | |
def test_numpy_find_sublists(n=2000, arrsize=100000): | |
""" | |
Creates a random array of ``arrsize`` ``n`` times. Each time, a | |
random subarray is extracted, and both are passed in random order to | |
numpy_find_sublist. If the function doesn't detect the correct relation | |
for a given iteration, an AssertionError will be raised. Otherwise, | |
prints success and returns True. | |
""" | |
import numpy as np | |
for i in range(n): | |
test_arr = np.random.rand(arrsize) | |
test_beg, test_end = sorted(np.random.randint(0, arrsize, 2)) | |
test_subarr = test_arr[test_beg:test_end].copy() | |
# test for subarr both ways | |
if test_beg % 2 == 0: # flip a coin | |
(tb, te), res2 = numpy_find_sublist(test_subarr, test_arr) | |
# print(res1, res2) | |
# tb, te = res2 | |
assert res2 is None, "test failed! 1" | |
assert tb == test_beg and te == test_end, "test failed! 2" | |
else: | |
res1, (tb, te) = numpy_find_sublist(test_arr, test_subarr) | |
# print(res1, res2) | |
# tb, te = res1 | |
assert res1 is None, "test failed! 3" | |
assert tb == test_beg and te == test_end, "test failed! 4" | |
print("test passed!") | |
return True | |
# ############################################################################# | |
# ## MAIN ROUTINE | |
# ############################################################################# | |
def main(): | |
""" | |
1. read arguments (check with -h) | |
2. if test was requested: test. If fails, raise exception. | |
3. load wavs. If samplerates are different: print warning | |
4. call subset finder subroutine and gather results | |
5. Human-friendly report of results | |
""" | |
# parse arguments from command line: | |
parser = argparse.ArgumentParser(description="GUI to test WAV-MVNX synch") | |
parser.add_argument("-w1", "--wav_path1", help="absolute path", | |
required=True) | |
parser.add_argument("-w2", "--wav_path2", help="absolute path", | |
required=True) | |
parser.add_argument("-T", "--test", help="call test function before start", | |
action="store_true") | |
args = parser.parse_args() | |
# main globals | |
WAV_PATH1 = args.wav_path1 | |
WAV_PATH2 = args.wav_path2 | |
TEST = args.test | |
# | |
if TEST: | |
assert test_numpy_find_sublists(1000), "test failed!" | |
# load wavs | |
wav1, sr1 = sf.read(WAV_PATH1) | |
wav2, sr2 = sf.read(WAV_PATH2) | |
if sr1 != sr2: | |
print("WARNING: audios have different samplerates:", (sr1, sr2)) | |
# compare wavs and print results | |
result1, result2 = numpy_find_sublist(wav1, wav2) | |
print("length(wav1) = %d, length(wav2) = %d, samplerates = %s" % ( | |
wav1.shape[0], wav2.shape[0], str((sr1, sr2)))) | |
if result1 is not None: | |
print("wav1 is part of wav2 in the following wav2 range:", result1) | |
if result2 is not None: | |
print("wav2 is part of wav1 in the following wav1 range:", result2) | |
if result1 == result2 == None: | |
print("no matches found") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment