moui72/timings.py

## timings.py
from os import walk
from os.path import join, basename
from pydub import AudioSegment
import pydub.scipy_effects
import numpy
import scipy.signal as sg
import csv


def count_silent_chunks(chunks, threshold, rev=False):
    """
    count the number of chunks that are silent from start or end to first noise and return it as an integer"

    Keyword arguments:
    chunks      --  a list of chunks of a sound file
    threshold   --  dbFS value to compare against for detection of silence
    rev         --  which edge to start from (True = right edge)
    """
    silent_blocks = 0
    rng = reversed(xrange(len(chunks))) if rev else xrange(len(chunks))

    for i in rng:
        if chunks[i].dBFS == float('-inf') or chunks[i].dBFS < threshold:
            silent_blocks += 1
        else:
            break
    return silent_blocks


def get_silence(audio, interval, threshold, step):
    """get length of silence at edges in ms from a wav file and return
    {start: [ms, blocks], end: [ms, blocks], duration: ms, threshold: dbFS}

    Keyword arguments:
    audio       --  filename
    interval    --  size of chunks in ms
    threshold   --  start dbFS threshold for silence comparison
    step        --  amount to increment by when searching for silence threshold
    """

    # chop off this number of ms from end of file (mouse click)
    end_omit = 250
    wav = AudioSegment.from_wav(audio)[:-1 * end_omit]

    # band-pass filter out low and high frequencies
    # -- low freq cut off, high freq cutoff, order (6db * order)
    wav = wav.band_pass_filter(100, 3000, 4)

    # from https://github.com/jiaaro/pydub/blob/master/pydub/scipy_effects.py
    # order: nth order butterworth filter(default: 5th order). The
    # attenuation is -6dB/octave beyond the cutoff frequency(for 1st
    # order). A Higher order filter will have more attenuation, each level
    # adding an additional - 6dB(so a 3rd order butterworth filter would
    # be - 18dB/octave).

    # break into chunks of interval ms
    chunks = [wav[i:i+interval]
              for i in range(0, len(wav), interval)]

    # min/max chunks of silence to guard against implausible results
    min_silence = 250 / interval
    max_silence = len(chunks) - min_silence - 1

    # find number of chunks with dBFS below threshold at start
    silent_blocks_start = 0
    selected_threshold = 0  # selected threshold
    for i in numpy.arange(threshold, 0, step):
        if silent_blocks_start > max_silence:
            silent_blocks_start = -1
            selected_threshold = 1
            break
        silent_blocks_start = count_silent_chunks(chunks, i)
        if silent_blocks_start > min_silence:
            selected_threshold = i
            break

    # find number of chunks with dBFS below threshold at end
    if selected_threshold < 0:
        silent_blocks_end = count_silent_chunks(
            chunks, selected_threshold, True)
    else:
        silent_blocks_end = -1

    if silent_blocks_end < min_silence + 1:
        silent_blocks_end = -1

    end_ms_silence = -1
    start_ms_silence = -1
    if silent_blocks_start > 0 and silent_blocks_start < max_silence:
        start_ms_silence = silent_blocks_start * interval - interval/2
    if silent_blocks_end > 0 and silent_blocks_end < max_silence:
        end_ms_silence = silent_blocks_end * interval - \
            interval/2 + end_omit

    return {"start": [start_ms_silence, silent_blocks_start], "end": [end_ms_silence, silent_blocks_end], "duration": len(chunks) * interval, "threshold": selected_threshold}


def print_item(name, vals):
    """ print vales for a file """
    print '{:>16} \t {:>8} {:>4} \t {:>8} {:>4} \t {:>8} \t {:-03.3f}'.format(
        basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"])


def write_csv(data, filename):
    """" write data to a csv """
    with open(str(filename), 'wb') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(["file", "leading_silence_ms", "leading_silence_blocks",
                         "trailing_silence_ms", "trailing_silence_blocks", "duration", "dbFS_threshold"])
        for (name, vals) in data:
            writer.writerow(
                [basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"]])


# get files in all directories in a directory
audio_path = "/Users/tyler/Downloads/recordings"
audio_files = []
for root, dirs, files in walk(audio_path):
    for name in files:
        if("wav" in name and "E" in name):
            # only sound files of Experimental items (E for EXP)
            audio_files.append(join(root, name))
    if "prac" in dirs:
        # eliminat practice items
        dirs.remove("prac")
    if "108" in dirs:
        # this is a glitch
        dirs.remove("108")
    if "0" in dirs:
        # this is test data
        dirs.remove("0")
    if "1000" in dirs:
        # this is test data
        dirs.remove("1000")

threshold = -60   # starting dbFS value for detecting silence
step = 0.01  # dbFS increment during search
interval = 50  # ms, increase to speed up

# iterate over all files and find sileces
edge_silences = {a: get_silence(
    join(audio_path, a), interval, threshold, step) for a in audio_files}

# output result and count files with bad values
fails = 0
for name, vals in sorted(edge_silences.items()):
    if vals["start"][0] < 0 or vals["end"][0] < 0:
        # no value found for start and/or end of file
        fails += 1
    print_item(name, vals)
print '\n', str(fails), "failures out of", str(len(edge_silences))

# write results to file
write_csv(edge_silences.items(), "output.csv")
	from os import walk
	from os.path import join, basename
	from pydub import AudioSegment
	import pydub.scipy_effects
	import numpy
	import scipy.signal as sg
	import csv


	def count_silent_chunks(chunks, threshold, rev=False):
	"""
	count the number of chunks that are silent from start or end to first noise and return it as an integer"

	Keyword arguments:
	chunks -- a list of chunks of a sound file
	threshold -- dbFS value to compare against for detection of silence
	rev -- which edge to start from (True = right edge)
	"""
	silent_blocks = 0
	rng = reversed(xrange(len(chunks))) if rev else xrange(len(chunks))

	for i in rng:
	if chunks[i].dBFS == float('-inf') or chunks[i].dBFS < threshold:
	silent_blocks += 1
	else:
	break
	return silent_blocks


	def get_silence(audio, interval, threshold, step):
	"""get length of silence at edges in ms from a wav file and return
	{start: [ms, blocks], end: [ms, blocks], duration: ms, threshold: dbFS}

	Keyword arguments:
	audio -- filename
	interval -- size of chunks in ms
	threshold -- start dbFS threshold for silence comparison
	step -- amount to increment by when searching for silence threshold
	"""

	# chop off this number of ms from end of file (mouse click)
	end_omit = 250
	wav = AudioSegment.from_wav(audio)[:-1 * end_omit]

	# band-pass filter out low and high frequencies
	# -- low freq cut off, high freq cutoff, order (6db * order)
	wav = wav.band_pass_filter(100, 3000, 4)

	# from https://github.com/jiaaro/pydub/blob/master/pydub/scipy_effects.py
	# order: nth order butterworth filter(default: 5th order). The
	# attenuation is -6dB/octave beyond the cutoff frequency(for 1st
	# order). A Higher order filter will have more attenuation, each level
	# adding an additional - 6dB(so a 3rd order butterworth filter would
	# be - 18dB/octave).

	# break into chunks of interval ms
	chunks = [wav[i:i+interval]
	for i in range(0, len(wav), interval)]

	# min/max chunks of silence to guard against implausible results
	min_silence = 250 / interval
	max_silence = len(chunks) - min_silence - 1

	# find number of chunks with dBFS below threshold at start
	silent_blocks_start = 0
	selected_threshold = 0 # selected threshold
	for i in numpy.arange(threshold, 0, step):
	if silent_blocks_start > max_silence:
	silent_blocks_start = -1
	selected_threshold = 1
	break
	silent_blocks_start = count_silent_chunks(chunks, i)
	if silent_blocks_start > min_silence:
	selected_threshold = i
	break

	# find number of chunks with dBFS below threshold at end
	if selected_threshold < 0:
	silent_blocks_end = count_silent_chunks(
	chunks, selected_threshold, True)
	else:
	silent_blocks_end = -1

	if silent_blocks_end < min_silence + 1:
	silent_blocks_end = -1

	end_ms_silence = -1
	start_ms_silence = -1
	if silent_blocks_start > 0 and silent_blocks_start < max_silence:
	start_ms_silence = silent_blocks_start * interval - interval/2
	if silent_blocks_end > 0 and silent_blocks_end < max_silence:
	end_ms_silence = silent_blocks_end * interval - \
	interval/2 + end_omit

	return {"start": [start_ms_silence, silent_blocks_start], "end": [end_ms_silence, silent_blocks_end], "duration": len(chunks) * interval, "threshold": selected_threshold}


	def print_item(name, vals):
	""" print vales for a file """
	print '{:>16} \t {:>8} {:>4} \t {:>8} {:>4} \t {:>8} \t {:-03.3f}'.format(
	basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"])


	def write_csv(data, filename):
	"""" write data to a csv """
	with open(str(filename), 'wb') as csvfile:
	writer = csv.writer(csvfile, delimiter=',')
	writer.writerow(["file", "leading_silence_ms", "leading_silence_blocks",
	"trailing_silence_ms", "trailing_silence_blocks", "duration", "dbFS_threshold"])
	for (name, vals) in data:
	writer.writerow(
	[basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"]])


	# get files in all directories in a directory
	audio_path = "/Users/tyler/Downloads/recordings"
	audio_files = []
	for root, dirs, files in walk(audio_path):
	for name in files:
	if("wav" in name and "E" in name):
	# only sound files of Experimental items (E for EXP)
	audio_files.append(join(root, name))
	if "prac" in dirs:
	# eliminat practice items
	dirs.remove("prac")
	if "108" in dirs:
	# this is a glitch
	dirs.remove("108")
	if "0" in dirs:
	# this is test data
	dirs.remove("0")
	if "1000" in dirs:
	# this is test data
	dirs.remove("1000")

	threshold = -60 # starting dbFS value for detecting silence
	step = 0.01 # dbFS increment during search
	interval = 50 # ms, increase to speed up

	# iterate over all files and find sileces
	edge_silences = {a: get_silence(
	join(audio_path, a), interval, threshold, step) for a in audio_files}

	# output result and count files with bad values
	fails = 0
	for name, vals in sorted(edge_silences.items()):
	if vals["start"][0] < 0 or vals["end"][0] < 0:
	# no value found for start and/or end of file
	fails += 1
	print_item(name, vals)
	print '\n', str(fails), "failures out of", str(len(edge_silences))

	# write results to file
	write_csv(edge_silences.items(), "output.csv")