Skip to content

Instantly share code, notes, and snippets.

@davidavdav
Created April 25, 2020 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidavdav/a85cde3ae7e0c7bf1b904e0da6deecac to your computer and use it in GitHub Desktop.
Save davidavdav/a85cde3ae7e0c7bf1b904e0da6deecac to your computer and use it in GitHub Desktop.
Some routines for computing energy levels in speech
#!/usr/bin/env julia
## (c) David A. van Leeuwen
## routines for adding noise at the right level
import MFCC
import WAV
import ProgressMeter
import StatsBase
import LinearAlgebra
import DelimitedFiles
import ArgParse
import Random
import JSON
# from energy to dB and vice sersa
undb(x) = 10 ^ (x / 10)
db(x) = 10log10(x)
const Aweight = undb.([ -16, -9, -3, 0.0, 1, 1, -1 ]) ## for octave bands 125 Hz / 8000 Hz
"""
Compute the energy per frame, summing over bands first and weighting these using the A-weighting
"""
function aWeightedEnergy(x::Vector, sr; steptime=0.1)
dur = length(x) / sr
wintime = min(dur, 2steptime)
if dur < 0.025
return [0.0]
end
p = MFCC.powspec(x, sr, wintime=wintime, steptime=0.5wintime)
nfreq, nframes = size(p)
freqstep = sr / 2 / nfreq
freq = collect(0 : nfreq-1) * freqstep ## fequency of bins
bands = exp10.(range(log10(125), log10(8000), length=7))
energy = zeros(nframes)
for (midfreq, aweight) in zip(bands, Aweight)
lowi = ceil(Int, midfreq / √2 / freqstep) + 1
highi = min(floor(Int, midfreq * √2 / freqstep) + 1, nfreq)
## println(p[lowi:highi, :], " ", aweight)
energy += vec(sum(p[lowi:highi, :], dims=1)) * aweight
end
return energy / nfreq
end
"""
Compute the A-weighted level of a speech file, in relation to a reference level `ref`.
By default the reference is a 1000 Hz sine tone at full aplitude (-1.0, 1.0)
with `sad==true`, a speech-level-meter speech activity detection is carried out,
effectively not accounting the silent parts of the speech in computing the level.
"""
function alevel(x::Vector; sr=16000.0, ref=reflevel, sad=true)
energy = aWeightedEnergy(x, sr)
if sad
energydb = db.(energy)
maxlevel = StatsBase.percentile(energydb, 99)
energy = energy[energydb .> maxlevel - 14]
## println(stderr, length(energy) / length(energydb))
return db(StatsBase.mean(energy)) - ref
else
## medenergy = StatsBase.percentile(energy, 50)
return db(StatsBase.mean(energy)) - ref
end
end
## can't be const, alas
const reflevel = alevel(sin.(2π * collect(0:16000) / 16), ref=0.0, sad=false)
## little helper
wavnorm(x::Tuple) = vec(x[1]), Float64(x[2])
function alevel(file::AbstractString; kwargs...)
x, sr = WAV.wavread(file) |> wavnorm
return alevel(x; sr=sr, kwargs...)
end
function stats(file::AbstractString)
x, sr = WAV.wavread(file) |> wavnorm
al = alevel(x; sr=sr)
dur = length(x) / sr
return dur, al
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment