Skip to content

Instantly share code, notes, and snippets.

@shivama205
Created March 20, 2017 14:19
Show Gist options
  • Save shivama205/5578f999a9c88112f5d042ebb83e54d5 to your computer and use it in GitHub Desktop.
Save shivama205/5578f999a9c88112f5d042ebb83e54d5 to your computer and use it in GitHub Desktop.
# compare.py
import argparse
from correlation import correlate
def initialize():
parser = argparse.ArgumentParser()
parser.add_argument("-i ", "--source-file", help="source file")
parser.add_argument("-o ", "--target-file", help="target file")
args = parser.parse_args()
SOURCE_FILE = args.source_file if args.source_file else None
TARGET_FILE = args.target_file if args.target_file else None
if not SOURCE_FILE or not TARGET_FILE:
raise Exception("Source or Target files not specified.")
return SOURCE_FILE, TARGET_FILE
if __name__ == "__main__":
SOURCE_FILE, TARGET_FILE = initialize()
correlate(SOURCE_FILE, TARGET_FILE)
# correlation.py
import commands
import numpy
# seconds to sample audio file for
sample_time = 500
# number of points to scan cross correlation over
span = 150
# step size (in points) of cross correlation
step = 1
# minimum number of points that must overlap in cross correlation
# exception is raised if this cannot be met
min_overlap = 20
# report match when cross correlation has a peak exceeding threshold
threshold = 0.5
# calculate fingerprint
def calculate_fingerprints(filename):
fpcalc_out = commands.getoutput('fpcalc -raw -length %i %s'
% (sample_time, filename))
fingerprint_index = fpcalc_out.find('FINGERPRINT=') + 12
# convert fingerprint to list of integers
fingerprints = map(int, fpcalc_out[fingerprint_index:].split(','))
return fingerprints
# returns correlation between lists
def correlation(listx, listy):
if len(listx) == 0 or len(listy) == 0:
# Error checking in main program should prevent us from ever being
# able to get here.
raise Exception('Empty lists cannot be correlated.')
if len(listx) > len(listy):
listx = listx[:len(listy)]
elif len(listx) < len(listy):
listy = listy[:len(listx)]
covariance = 0
for i in range(len(listx)):
covariance += 32 - bin(listx[i] ^ listy[i]).count("1")
covariance = covariance / float(len(listx))
return covariance/32
# return cross correlation, with listy offset from listx
def cross_correlation(listx, listy, offset):
if offset > 0:
listx = listx[offset:]
listy = listy[:len(listx)]
elif offset < 0:
offset = -offset
listy = listy[offset:]
listx = listx[:len(listy)]
if min(len(listx), len(listy)) < min_overlap:
# Error checking in main program should prevent us from ever being
# able to get here.
return
#raise Exception('Overlap too small: %i' % min(len(listx), len(listy)))
return correlation(listx, listy)
# cross correlate listx and listy with offsets from -span to span
def compare(listx, listy, span, step):
if span > min(len(listx), len(listy)):
# Error checking in main program should prevent us from ever being
# able to get here.
raise Exception('span >= sample size: %i >= %i\n'
% (span, min(len(listx), len(listy)))
+ 'Reduce span, reduce crop or increase sample_time.')
corr_xy = []
for offset in numpy.arange(-span, span + 1, step):
corr_xy.append(cross_correlation(listx, listy, offset))
return corr_xy
# return index of maximum value in list
def max_index(listx):
max_index = 0
max_value = listx[0]
for i, value in enumerate(listx):
if value > max_value:
max_value = value
max_index = i
return max_index
def get_max_corr(corr, source, target):
max_corr_index = max_index(corr)
max_corr_offset = -span + max_corr_index * step
print "max_corr_index = ", max_corr_index, "max_corr_offset = ", max_corr_offset
# report matches
if corr[max_corr_index] > threshold:
print('%s and %s match with correlation of %.4f at offset %i'
% (source, target, corr[max_corr_index], max_corr_offset))
def correlate(source, target):
fingerprint_source = calculate_fingerprints(source)
fingerprint_target = calculate_fingerprints(target)
corr = compare(fingerprint_source, fingerprint_target, span, step)
max_corr_offset = get_max_corr(corr, source, target)
@vin-the-dev
Copy link

It doesn't seems to work I am getting the below error.

invalid literal for int() with base 10: 'command not found'

@JannemanDev
Copy link

JannemanDev commented Jun 2, 2019

Doesn't work. Print on line 87 gives syntax error.
Also this doesn't compile in Python 3:
-Commands module is deprecated. Use subprocess module instead.
-Also numpy module must be installed: pip3 install numpy
-ton of list and map errors

Was it originally written for Python 2?
On Python 2 I get:
Traceback (most recent call last):
File "C:\Temp\ffmpeg\bin\compare.py", line 19, in
correlate(SOURCE_FILE, TARGET_FILE)
File "C:\Temp\ffmpeg\bin\correlation.py", line 94, in correlate
fingerprint_source = calculate_fingerprints(source)
File "C:\Temp\ffmpeg\bin\correlation.py", line 23, in calculate_fingerprints
fingerprints = map(int, fpcalc_out[fingerprint_index:].split(','))
ValueError: invalid literal for int() with base 10: 'recognized as an internal or external command'

@JannemanDev
Copy link

Got it working with Python 2.7:
-Commands module is deprecated. Use subprocess module instead.
-Also change commands.getoutput to subprocess.check_output

@adon988
Copy link

adon988 commented Jul 25, 2019

Environment:

  • Mac
  • python 2.7.10
a.wav           
b.wav           
compare.py      
correlation.py  

compare.py

  1 # compare.py
  2 import argparse
  3 from correlation import correlate
  4
  5 def initialize():
  6     parser = argparse.ArgumentParser()
  7     parser.add_argument("-i ", "--a.wav", help="source file")
  8     parser.add_argument("-o ", "--b.wav", help="target file")
  9     args = parser.parse_args()
 10
 11     SOURCE_FILE = args.source_file if args.source_file else None
 12     TARGET_FILE = args.target_file if args.target_file else None
 13     if not SOURCE_FILE or not TARGET_FILE:
 14       raise Exception("Source or Target files not specified.")
 15     return SOURCE_FILE, TARGET_FILE
 16
 17 if __name__ == "__main__":
 18     SOURCE_FILE, TARGET_FILE = initialize()
 19     correlate(SOURCE_FILE, TARGET_FILE)

Get error:

Traceback (most recent call last):
  File "compare.py", line 18, in <module>
    SOURCE_FILE, TARGET_FILE = initialize()
  File "compare.py", line 11, in initialize
    SOURCE_FILE = args.source_file if args.source_file else None
AttributeError: 'Namespace' object has no attribute 'source_file'

@pranavikomm
Copy link

Environment : python 3.8 , windows 10
Got this error for above code

Traceback (most recent call last):
File "compare.py", line 21, in
correlate(SOURCE_FILE, TARGET_FILE)
File "E:\Test_scripts\TTS\correlation.py", line 103, in correlate
corr = compare(fingerprint_source, fingerprint_target, span, step)
File "E:\Test_scripts\TTS\correlation.py", line 66, in compare
if span > min(len(listx), len(listy)):
TypeError: object of type 'map' has no len()

@chamodi123
Copy link

How to run this?

@gazala-ansari
Copy link

how to run this code in python 3.???

@kdave
Copy link

kdave commented Oct 7, 2021

Useful scripts, thanks, here's the ported version https://github.com/kdave/audio-compare .

@Madhushika97
Copy link

how I can assign source and target files?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment