jboone/goertzel.py

## goertzel.py
#!/usr/bin/env python3

import sys
import wave

import numpy
import scipy.signal

def detect_tone(x, f, fs, step_size):
	# Apply Goertzel algorithm

	# Use a window of a nice round multiple of the period of the frequency we're detecting.
	n = 200 #int(round(fs / f * 16))

	# Calculate coefficients to multiply the sample window against
	w0 = 2 * numpy.pi * f / fs
	coeffs = numpy.exp(
		numpy.arange(n, dtype=numpy.complex64) * w0 * -1j
	) / n

	# Output array
	output_len = len(x) // step_size
	result = numpy.zeros((output_len,), dtype=numpy.float32)

	# Step through samples, taking a window each step and applying the Goertzel coefficients.
	for i in range(output_len):
		chunk = x[i*step_size:i*step_size+n]
		if len(chunk) == n:
			result[i] = numpy.abs(numpy.dot(coeffs, chunk))

	return result

# Expected WAV file sampling rate.
fs = 8000

# Time resolution of our detections. We do a detection at every `step` samples.
step_size = 20

# Read WAV file and check that code assumptions are true of the file.
wav = wave.open(sys.argv[1], mode='rb')
assert(wav.getparams()[:3] == (1, 2, fs))
assert(wav.getparams()[4] == 'NONE')
frames = wav.getparams()[3]

# Convert the WAV int16s into floats scaled to +/-1 full-scale.
samples_bytes = wav.readframes(frames)
samples = numpy.frombuffer(samples_bytes, dtype=numpy.int16)
samples = numpy.array(samples, dtype=numpy.float32)
samples /= 32768

# For each DTMF tone, do detection on the entire WAV file.
mag_map = {}
for f in (697, 770, 852, 941, 1209, 1336, 1477, 1633):
	tone_mag = detect_tone(samples, f, fs, step_size)

	assert(tone_mag.dtype == numpy.float32)
	tone_mag.tofile('{:04d}.f32'.format(f))

	mag_map[f] = tone_mag

# Grab the lengths of each detection magnitude array. They should all be the same length.
mag_lengths = [len(d) for d in mag_map.values()]
mag_length = mag_lengths[0]

# Across all frequencies in the low DTMF band, get the maximum magnitude and normalize.
# Then build a [n][4] array where the first dimension is time (in steps) and the
# second dimension is the magnitudes of the four frequencies.
low_band_max = max([max(mag_map[f]) for f in (697, 770, 852, 941)])
low_band = numpy.zeros((mag_length, 4), dtype=numpy.float32)
low_band[:,0] = mag_map[697] / low_band_max
low_band[:,1] = mag_map[770] / low_band_max
low_band[:,2] = mag_map[852] / low_band_max
low_band[:,3] = mag_map[941] / low_band_max
low_band.tofile('lo_x_4.f32')

# Do same for high band as for low band, above.
high_band_max = max([max(mag_map[f]) for f in (1209, 1336, 1477, 1633)])
high_band = numpy.zeros((mag_length, 4), dtype=numpy.float32)
high_band[:,0] = mag_map[1209] / high_band_max
high_band[:,1] = mag_map[1336] / high_band_max
high_band[:,2] = mag_map[1477] / high_band_max
high_band[:,3] = mag_map[1633] / high_band_max
high_band.tofile('hi_x_4.f32')

# Table to map low and high frequency indices to DTMF characters.
dtmf_char_map = (
	('1', '4', '7', '*'),	# column 1209
	('2', '5', '8', '0'),	# column 1336
	('3', '6', '9', '#'),	# column 1477
	('A', 'B', 'C', 'D'),	# column 1633
)

def detect_digits(samples, detect_threshold=0.25):
	# Detection threshold: At least one frequency in the band must be above this
	# threshold to be considered a valid DTMF character event.

	# Walk through the time steps from the DTMF detection arrays.
	raw = []
	detections = []
	dtmf_char_last = None
	dtmf_char_time_start = 0
	for step, pair in enumerate(zip(low_band, high_band)):
		time_samples = step * step_size
		time_seconds = time_samples / fs

		lo, hi = pair

		# Identify strongest frequency in the band, and if that frequency crosses the threshold.
		lo_max = max(lo)
		lo_arg = numpy.argmax(lo)
		lo_det = lo_max >= detect_threshold

		hi_max = max(hi)
		hi_arg = numpy.argmax(hi)
		hi_det = hi_max >= detect_threshold

		# If we detect a frequency in both the low and high bands, we can decode a DTMF character.
		dtmf_char = dtmf_char_map[hi_arg][lo_arg] if lo_det and hi_det else None
		if dtmf_char_last != dtmf_char:
			duration = time_seconds - dtmf_char_time_start
			detection = {
				'timestamp': dtmf_char_time_start,
				'duration': duration,
				'character': dtmf_char_last,
			}
			detections.append(detection)

			dtmf_char_last = dtmf_char
			dtmf_char_time_start = time_seconds

		raw.append(dtmf_char if dtmf_char is not None else '_')

	# TODO: The last detected tone is not printed out.

	return {
		'raw': ''.join(raw),
		'detections': detections,
	}

def print_detections(detections, minimum_duration=0.0, output_breaks=True):
	for detection in detections:
		if output_breaks or (detection['character'] is not None):
			if detection['duration'] >= minimum_duration:
				dtmf_char_str = detection['character'] if detection['character'] is not None else ' '
				print('{:8.4f} {:8.4f} {:s}'.format(detection['timestamp'], detection['duration'], dtmf_char_str))

detections_result = detect_digits(samples)

raw = detections_result['raw']
print(raw)

detections = detections_result['detections']

for minimum_duration in numpy.arange(0, 0.03, 0.0025):
	chars = [detection['character'] for detection in detections if detection['duration'] >= minimum_duration and detection['character'] is not None]
	chars_str = ''.join(chars)
	print('>={:6.4f} {:s}'.format(minimum_duration, chars_str))
	#!/usr/bin/env python3

	import sys
	import wave

	import numpy
	import scipy.signal

	def detect_tone(x, f, fs, step_size):
	# Apply Goertzel algorithm

	# Use a window of a nice round multiple of the period of the frequency we're detecting.
	n = 200 #int(round(fs / f * 16))

	# Calculate coefficients to multiply the sample window against
	w0 = 2 * numpy.pi * f / fs
	coeffs = numpy.exp(
	numpy.arange(n, dtype=numpy.complex64) * w0 * -1j
	) / n

	# Output array
	output_len = len(x) // step_size
	result = numpy.zeros((output_len,), dtype=numpy.float32)

	# Step through samples, taking a window each step and applying the Goertzel coefficients.
	for i in range(output_len):
	chunk = x[istep_size:istep_size+n]
	if len(chunk) == n:
	result[i] = numpy.abs(numpy.dot(coeffs, chunk))

	return result

	# Expected WAV file sampling rate.
	fs = 8000

	# Time resolution of our detections. We do a detection at every `step` samples.
	step_size = 20

	# Read WAV file and check that code assumptions are true of the file.
	wav = wave.open(sys.argv[1], mode='rb')
	assert(wav.getparams()[:3] == (1, 2, fs))
	assert(wav.getparams()[4] == 'NONE')
	frames = wav.getparams()[3]

	# Convert the WAV int16s into floats scaled to +/-1 full-scale.
	samples_bytes = wav.readframes(frames)
	samples = numpy.frombuffer(samples_bytes, dtype=numpy.int16)
	samples = numpy.array(samples, dtype=numpy.float32)
	samples /= 32768

	# For each DTMF tone, do detection on the entire WAV file.
	mag_map = {}
	for f in (697, 770, 852, 941, 1209, 1336, 1477, 1633):
	tone_mag = detect_tone(samples, f, fs, step_size)

	assert(tone_mag.dtype == numpy.float32)
	tone_mag.tofile('{:04d}.f32'.format(f))

	mag_map[f] = tone_mag

	# Grab the lengths of each detection magnitude array. They should all be the same length.
	mag_lengths = [len(d) for d in mag_map.values()]
	mag_length = mag_lengths[0]

	# Across all frequencies in the low DTMF band, get the maximum magnitude and normalize.
	# Then build a [n][4] array where the first dimension is time (in steps) and the
	# second dimension is the magnitudes of the four frequencies.
	low_band_max = max([max(mag_map[f]) for f in (697, 770, 852, 941)])
	low_band = numpy.zeros((mag_length, 4), dtype=numpy.float32)
	low_band[:,0] = mag_map[697] / low_band_max
	low_band[:,1] = mag_map[770] / low_band_max
	low_band[:,2] = mag_map[852] / low_band_max
	low_band[:,3] = mag_map[941] / low_band_max
	low_band.tofile('lo_x_4.f32')

	# Do same for high band as for low band, above.
	high_band_max = max([max(mag_map[f]) for f in (1209, 1336, 1477, 1633)])
	high_band = numpy.zeros((mag_length, 4), dtype=numpy.float32)
	high_band[:,0] = mag_map[1209] / high_band_max
	high_band[:,1] = mag_map[1336] / high_band_max
	high_band[:,2] = mag_map[1477] / high_band_max
	high_band[:,3] = mag_map[1633] / high_band_max
	high_band.tofile('hi_x_4.f32')

	# Table to map low and high frequency indices to DTMF characters.
	dtmf_char_map = (
	('1', '4', '7', '*'), # column 1209
	('2', '5', '8', '0'), # column 1336
	('3', '6', '9', '#'), # column 1477
	('A', 'B', 'C', 'D'), # column 1633
	)

	def detect_digits(samples, detect_threshold=0.25):
	# Detection threshold: At least one frequency in the band must be above this
	# threshold to be considered a valid DTMF character event.

	# Walk through the time steps from the DTMF detection arrays.
	raw = []
	detections = []
	dtmf_char_last = None
	dtmf_char_time_start = 0
	for step, pair in enumerate(zip(low_band, high_band)):
	time_samples = step * step_size
	time_seconds = time_samples / fs

	lo, hi = pair

	# Identify strongest frequency in the band, and if that frequency crosses the threshold.
	lo_max = max(lo)
	lo_arg = numpy.argmax(lo)
	lo_det = lo_max >= detect_threshold

	hi_max = max(hi)
	hi_arg = numpy.argmax(hi)
	hi_det = hi_max >= detect_threshold

	# If we detect a frequency in both the low and high bands, we can decode a DTMF character.
	dtmf_char = dtmf_char_map[hi_arg][lo_arg] if lo_det and hi_det else None
	if dtmf_char_last != dtmf_char:
	duration = time_seconds - dtmf_char_time_start
	detection = {
	'timestamp': dtmf_char_time_start,
	'duration': duration,
	'character': dtmf_char_last,
	}
	detections.append(detection)

	dtmf_char_last = dtmf_char
	dtmf_char_time_start = time_seconds

	raw.append(dtmf_char if dtmf_char is not None else '_')

	# TODO: The last detected tone is not printed out.

	return {
	'raw': ''.join(raw),
	'detections': detections,
	}

	def print_detections(detections, minimum_duration=0.0, output_breaks=True):
	for detection in detections:
	if output_breaks or (detection['character'] is not None):
	if detection['duration'] >= minimum_duration:
	dtmf_char_str = detection['character'] if detection['character'] is not None else ' '
	print('{:8.4f} {:8.4f} {:s}'.format(detection['timestamp'], detection['duration'], dtmf_char_str))

	detections_result = detect_digits(samples)

	raw = detections_result['raw']
	print(raw)

	detections = detections_result['detections']

	for minimum_duration in numpy.arange(0, 0.03, 0.0025):
	chars = [detection['character'] for detection in detections if detection['duration'] >= minimum_duration and detection['character'] is not None]
	chars_str = ''.join(chars)
	print('>={:6.4f} {:s}'.format(minimum_duration, chars_str))