endolith/frequency_estimator.py

## readme.txt
A few simple frequency estimation methods in Python

These are the methods that everyone recommends when someone asks about
frequency estimation or pitch detection.  (Such as here:
http://stackoverflow.com/questions/65268/music-how-do-you-analyse-the-fundamental-frequency-of-a-pcm-or-wac-sample/)

None of them work well in all situations, and I am sure there are much better
methods "in the literature", but here is some code for the simple methods.

* Count zero-crossings
 * Using interpolation to find a "truer" zero-crossing gives better accuracy
* Do FFT and find the peak
 * Using interpolation to find a "truer" peak gives better accuracy
* Do autocorrelation and find the peak
* Calculate harmonic product spectrum and find the peak

## frequency_estimator.py
from __future__ import division
from scikits.audiolab import flacread
from numpy.fft import rfft, irfft
from numpy import argmax, sqrt, mean, diff, log
from matplotlib.mlab import find
from scipy.signal import blackmanharris, fftconvolve
from time import time
import sys

# Faster version from http://projects.scipy.org/scipy/browser/trunk/scipy/signal/signaltools.py
# from signaltoolsmod import fftconvolve
from parabolic import parabolic

def freq_from_crossings(sig, fs):
    """Estimate frequency by counting zero crossings

    Pros: Fast, accurate (increasing with signal length).  Works well for long
    low-noise sines, square, triangle, etc.

    Cons: Doesn't work if there are multiple zero crossings per cycle,
    low-frequency baseline shift, noise, etc.

    """
    # Find all indices right before a rising-edge zero crossing
    indices = find((sig[1:] >= 0) & (sig[:-1] < 0))

    # Naive (Measures 1000.185 Hz for 1000 Hz, for instance)
    #crossings = indices

    # More accurate, using linear interpolation to find intersample
    # zero-crossings (Measures 1000.000129 Hz for 1000 Hz, for instance)
    crossings = [i - sig[i] / (sig[i+1] - sig[i]) for i in indices]

    # Some other interpolation based on neighboring points might be better. Spline, cubic, whatever

    return fs / mean(diff(crossings))

def freq_from_fft(sig, fs):
    """Estimate frequency from peak of FFT

    Pros: Accurate, usually even more so than zero crossing counter
    (1000.000004 Hz for 1000 Hz, for instance).  Due to parabolic interpolation
    being a very good fit for windowed log FFT peaks?
    https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
    Accuracy also increases with signal length

    Cons: Doesn't find the right value if harmonics are stronger than
    fundamental, which is common.

    """
    # Compute Fourier transform of windowed signal
    windowed = signal * blackmanharris(len(signal))
    f = rfft(windowed)

    # Find the peak and interpolate to get a more accurate peak
    i = argmax(abs(f)) # Just use this for less-accurate, naive version
    true_i = parabolic(log(abs(f)), i)[0]

    # Convert to equivalent frequency
    return fs * true_i / len(windowed)

def freq_from_autocorr(sig, fs):
    """Estimate frequency using autocorrelation

    Pros: Best method for finding the true fundamental of any repeating wave,
    even with strong harmonics or completely missing fundamental

    Cons: Not as accurate, currently has trouble with finding the true peak

    """
    # Calculate circular autocorrelation (same thing as convolution, but with
    # one input reversed in time), and throw away the negative lags
    corr = fftconvolve(sig, sig[::-1], mode='full')
    corr = corr[len(corr)/2:]

    # Find the first low point
    d = diff(corr)
    start = find(d > 0)[0]

    # Find the next peak after the low point (other than 0 lag).  This bit is
    # not reliable for long signals, due to the desired peak occurring between
    # samples, and other peaks appearing higher.
    # Should use a weighting function to de-emphasize the peaks at longer lags.
    # Also could zero-pad before doing circular autocorrelation.
    peak = argmax(corr[start:]) + start
    px, py = parabolic(corr, peak)

    return fs / px

filename = sys.argv[1]

print 'Reading file "%s"\n' % filename
signal, fs, enc = flacread(filename)

print 'Calculating frequency from FFT:',
start_time = time()
print '%f Hz'   % freq_from_fft(signal, fs)
print 'Time elapsed: %.3f s\n' % (time() - start_time)

print 'Calculating frequency from zero crossings:',
start_time = time()
print '%f Hz' % freq_from_crossings(signal, fs)
print 'Time elapsed: %.3f s\n' % (time() - start_time)

print 'Calculating frequency from autocorrelation:',
start_time = time()
print '%f Hz' % freq_from_autocorr(signal, fs)
print 'Time elapsed: %.3f s\n' % (time() - start_time)

## parabolic.py
from __future__ import division

def parabolic(f, x):
    """Quadratic interpolation for estimating the true position of an
    inter-sample maximum when nearby samples are known.

    f is a vector and x is an index for that vector.

    Returns (vx, vy), the coordinates of the vertex of a parabola that goes
    through point x and its two neighbors.

    Example:
    Defining a vector f with a local maximum at index 3 (= 6), find local
    maximum if points 2, 3, and 4 actually defined a parabola.

    In [3]: f = [2, 3, 1, 6, 4, 2, 3, 1]

    In [4]: parabolic(f, argmax(f))
    Out[4]: (3.2142857142857144, 6.1607142857142856)

    """
    # Requires real division.  Insert float() somewhere to force it?
    xv = 1/2 * (f[x-1] - f[x+1]) / (f[x-1] - 2 * f[x] + f[x+1]) + x
    yv = f[x] - 1/4 * (f[x-1] - f[x+1]) * (xv - x)
    return (xv, yv)

## parabolic.txt
"Note that the Gaussian window transform magnitude is precisely a parabola on a dB scale. As a result, quadratic spectral peak interpolation is *exact* under the Gaussian window. Of course, we must somehow remove the infinitely long tails of the Gaussian window in practice, but this does not cause much deviation from a parabola"

https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html


Actually this can be done by polyfit function, too, but requires more steps:

f = [2, 3, 1, 6, 4, 2, 3, 1]

In [8]: parabolic(f, argmax(f))
Out[8]: (3.2142857142857144, 6.1607142857142856)

In [9]: a, b, c = polyfit([2,3,4],[1,6,4],2)
In [10]: x = -0.5*b/a

In [11]: x
Out[11]: 3.2142857142857695

In [12]: a*x**2 + b*x + c
Out[12]: 6.1607142857143025

parabolic() should be updated to use this, and then it will work for non-uniform sampled input as well.


Alternative estimators with even lower error: http://www.dspguru.com/dsp/howtos/how-to-interpolate-fft-peak
	A few simple frequency estimation methods in Python

	These are the methods that everyone recommends when someone asks about
	frequency estimation or pitch detection. (Such as here:
	http://stackoverflow.com/questions/65268/music-how-do-you-analyse-the-fundamental-frequency-of-a-pcm-or-wac-sample/)

	None of them work well in all situations, and I am sure there are much better
	methods "in the literature", but here is some code for the simple methods.

	* Count zero-crossings
	* Using interpolation to find a "truer" zero-crossing gives better accuracy
	* Do FFT and find the peak
	* Using interpolation to find a "truer" peak gives better accuracy
	* Do autocorrelation and find the peak
	* Calculate harmonic product spectrum and find the peak
	from __future__ import division
	from scikits.audiolab import flacread
	from numpy.fft import rfft, irfft
	from numpy import argmax, sqrt, mean, diff, log
	from matplotlib.mlab import find
	from scipy.signal import blackmanharris, fftconvolve
	from time import time
	import sys

	# Faster version from http://projects.scipy.org/scipy/browser/trunk/scipy/signal/signaltools.py
	# from signaltoolsmod import fftconvolve
	from parabolic import parabolic

	def freq_from_crossings(sig, fs):
	"""Estimate frequency by counting zero crossings

	Pros: Fast, accurate (increasing with signal length). Works well for long
	low-noise sines, square, triangle, etc.

	Cons: Doesn't work if there are multiple zero crossings per cycle,
	low-frequency baseline shift, noise, etc.

	"""
	# Find all indices right before a rising-edge zero crossing
	indices = find((sig[1:] >= 0) & (sig[:-1] < 0))

	# Naive (Measures 1000.185 Hz for 1000 Hz, for instance)
	#crossings = indices

	# More accurate, using linear interpolation to find intersample
	# zero-crossings (Measures 1000.000129 Hz for 1000 Hz, for instance)
	crossings = [i - sig[i] / (sig[i+1] - sig[i]) for i in indices]

	# Some other interpolation based on neighboring points might be better. Spline, cubic, whatever

	return fs / mean(diff(crossings))

	def freq_from_fft(sig, fs):
	"""Estimate frequency from peak of FFT

	Pros: Accurate, usually even more so than zero crossing counter
	(1000.000004 Hz for 1000 Hz, for instance). Due to parabolic interpolation
	being a very good fit for windowed log FFT peaks?
	https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
	Accuracy also increases with signal length

	Cons: Doesn't find the right value if harmonics are stronger than
	fundamental, which is common.

	"""
	# Compute Fourier transform of windowed signal
	windowed = signal * blackmanharris(len(signal))
	f = rfft(windowed)

	# Find the peak and interpolate to get a more accurate peak
	i = argmax(abs(f)) # Just use this for less-accurate, naive version
	true_i = parabolic(log(abs(f)), i)[0]

	# Convert to equivalent frequency
	return fs * true_i / len(windowed)

	def freq_from_autocorr(sig, fs):
	"""Estimate frequency using autocorrelation

	Pros: Best method for finding the true fundamental of any repeating wave,
	even with strong harmonics or completely missing fundamental

	Cons: Not as accurate, currently has trouble with finding the true peak

	"""
	# Calculate circular autocorrelation (same thing as convolution, but with
	# one input reversed in time), and throw away the negative lags
	corr = fftconvolve(sig, sig[::-1], mode='full')
	corr = corr[len(corr)/2:]

	# Find the first low point
	d = diff(corr)
	start = find(d > 0)[0]

	# Find the next peak after the low point (other than 0 lag). This bit is
	# not reliable for long signals, due to the desired peak occurring between
	# samples, and other peaks appearing higher.
	# Should use a weighting function to de-emphasize the peaks at longer lags.
	# Also could zero-pad before doing circular autocorrelation.
	peak = argmax(corr[start:]) + start
	px, py = parabolic(corr, peak)

	return fs / px

	filename = sys.argv[1]

	print 'Reading file "%s"\n' % filename
	signal, fs, enc = flacread(filename)

	print 'Calculating frequency from FFT:',
	start_time = time()
	print '%f Hz' % freq_from_fft(signal, fs)
	print 'Time elapsed: %.3f s\n' % (time() - start_time)

	print 'Calculating frequency from zero crossings:',
	start_time = time()
	print '%f Hz' % freq_from_crossings(signal, fs)
	print 'Time elapsed: %.3f s\n' % (time() - start_time)

	print 'Calculating frequency from autocorrelation:',
	start_time = time()
	print '%f Hz' % freq_from_autocorr(signal, fs)
	print 'Time elapsed: %.3f s\n' % (time() - start_time)
	from __future__ import division

	def parabolic(f, x):
	"""Quadratic interpolation for estimating the true position of an
	inter-sample maximum when nearby samples are known.

	f is a vector and x is an index for that vector.

	Returns (vx, vy), the coordinates of the vertex of a parabola that goes
	through point x and its two neighbors.

	Example:
	Defining a vector f with a local maximum at index 3 (= 6), find local
	maximum if points 2, 3, and 4 actually defined a parabola.

	In [3]: f = [2, 3, 1, 6, 4, 2, 3, 1]

	In [4]: parabolic(f, argmax(f))
	Out[4]: (3.2142857142857144, 6.1607142857142856)

	"""
	# Requires real division. Insert float() somewhere to force it?
	xv = 1/2 * (f[x-1] - f[x+1]) / (f[x-1] - 2 * f[x] + f[x+1]) + x
	yv = f[x] - 1/4 * (f[x-1] - f[x+1]) * (xv - x)
	return (xv, yv)
	"Note that the Gaussian window transform magnitude is precisely a parabola on a dB scale. As a result, quadratic spectral peak interpolation is exact under the Gaussian window. Of course, we must somehow remove the infinitely long tails of the Gaussian window in practice, but this does not cause much deviation from a parabola"

	https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html


	Actually this can be done by polyfit function, too, but requires more steps:

	f = [2, 3, 1, 6, 4, 2, 3, 1]

	In [8]: parabolic(f, argmax(f))
	Out[8]: (3.2142857142857144, 6.1607142857142856)

	In [9]: a, b, c = polyfit([2,3,4],[1,6,4],2)
	In [10]: x = -0.5*b/a

	In [11]: x
	Out[11]: 3.2142857142857695

	In [12]: ax2 + bx + c
	Out[12]: 6.1607142857143025

	parabolic() should be updated to use this, and then it will work for non-uniform sampled input as well.


	Alternative estimators with even lower error: http://www.dspguru.com/dsp/howtos/how-to-interpolate-fft-peak