Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@kotarou3
Created October 11, 2014 15:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kotarou3/12d9a7db32a141489b73 to your computer and use it in GitHub Desktop.
Save kotarou3/12d9a7db32a141489b73 to your computer and use it in GitHub Desktop.
Cleans small snippets of speech, and attempts to equate loudness
#!/usr/bin/nodejs --harmony
"use strict";
require("prfun");
function readAudio(filename) {
return new Promise(function (resolve, reject) {
let reader = new (require("wav").Reader)();
let rawBuffer = new Buffer(0);
let format;
require("fs").createReadStream(filename).on("error", reject).pipe(reader).on("format", function (f) {
format = f;
if (f.endianness !== "LE" || f.channels !== 1 || (f.bitDepth !== 16 && f.bitDepth !== 32) || !f.signed)
reject("Don't know what to do with format: " + JSON.stringify(f));
}).on("data", function (data) {
rawBuffer = Buffer.concat([rawBuffer, data]);
}).on("end", function () {
let buffer = null;
if (format.bitDepth === 16) {
buffer = new Float32Array(rawBuffer.length / 2);
for (let b = 0; b * 2 < rawBuffer.length; ++b)
buffer[b] = rawBuffer.readInt16LE(b * 2) / 0x8000;
} else if (format.bitDepth === 32) {
buffer = new Float32Array(rawBuffer.length / 4);
for (let b = 0; b * 4 < rawBuffer.length; ++b)
buffer[b] = rawBuffer.readFloatLE(b * 4);
}
resolve({buffer: buffer, sampleRate: format.sampleRate});
}).on("error", reject);
});
}
function writeAudio(buffer, sampleRate, filename, isFloat) {
let rawBuffer;
if (isFloat) {
rawBuffer = new Buffer(buffer.length * 4);
for (let b = 0; b < buffer.length; ++b)
rawBuffer.writeFloatLE(buffer[b], b * 4);
} else {
rawBuffer = new Buffer(buffer.length * 2);
let maxClip = 0;
for (let b = 0; b < buffer.length; ++b) {
if (Math.abs(buffer[b]) > 1) {
maxClip = Math.max(maxClip, Math.abs(buffer[b]));
continue;
}
rawBuffer.writeInt16LE(Math.round(buffer[b] * 0x7fff), b * 2);
}
if (maxClip !== 0)
return Promise.reject("Clipped by " + maxClip);
}
let writer = new (require("wav").FileWriter)(filename, {format: isFloat ? 3 : 1, channels: 1, sampleRate: sampleRate, bitDepth: isFloat ? 32 : 16});
writer.write(rawBuffer);
return new Promise(function (resolve, reject) {
writer.on("done", resolve.bind(null, filename)).on("error", reject);
});
}
// Calculated from ISO226:2003 at 50 phons
const equalLoudnessContour = {
freq: [20, 25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300, 8000, 10000, 12500],
spl: [104.720, 99.145, 93.694, 88.485, 83.963, 79.606, 75.362, 71.609, 68.170, 64.679, 61.721, 59.043, 56.550, 54.265, 52.590, 51.101, 49.983, 50.011, 51.989, 52.875, 49.618, 46.906, 46.050, 47.146, 50.479, 56.112, 61.756, 63.780, 60.135]
};
let equalLoudnessContourAtCache = {};
function equalLoudnessContourAt(frequency) {
if (frequency < equalLoudnessContour.freq[0] || frequency > equalLoudnessContour.freq[equalLoudnessContour.freq.length - 1])
return NaN;
if (frequency in equalLoudnessContourAtCache)
return equalLoudnessContourAtCache[frequency];
let spl = require('cubic-spline')(frequency, equalLoudnessContour.freq, equalLoudnessContour.spl);
return spl / 8 - 17; // Hacked together formula to converts SPL to power which works reasonably well for speech
}
function rmsToPower(rms) {
return 10 * Math.log(rms) / Math.log(10);
}
function powerToRms(power) {
return Math.pow(10, power / 10);
}
function removeDcOffset(buffer) {
let mean = 0;
for (var b = 0; b < buffer.length; ++b)
mean += buffer[b];
mean /= buffer.length;
for (let b = 0; b < buffer.length; ++b)
buffer[b] -= mean;
return buffer;
}
function fft(buffer) {
let length = Math.pow(2, Math.ceil(Math.log(buffer.length) / Math.log(2)));
let rawBuffer = null;
if (length === buffer.length) {
rawBuffer = buffer;
} else {
rawBuffer = new Float32Array(length);
rawBuffer.set(buffer);
}
let transform = new (require("digitalsignals").FFT)(rawBuffer.length, 1);
transform.forward(rawBuffer);
let spectrum = {
real: transform.real.subarray(0, transform.real.length / 2 + 1),
imag: transform.imag.subarray(0, transform.imag.length / 2 + 1)
};
for (let f = 0; f < spectrum.real.length; ++f) {
spectrum.real[f] /= spectrum.real.length - 1;
spectrum.imag[f] /= spectrum.imag.length - 1;
}
return spectrum;
}
function ifft(real, imag, length) {
let rawReal = new Float32Array((real.length - 1) * 2);
let rawImag = new Float32Array((imag.length - 1) * 2);
rawReal.set(real);
rawImag.set(imag);
for (let f = 1; f < real.length; ++f) {
rawReal[f] *= real.length - 1;
rawImag[f] *= imag.length - 1;
rawReal[rawReal.length - f] = rawReal[f];
rawImag[rawImag.length - f] = -rawImag[f];
}
let transform = new (require("digitalsignals").FFT)(Math.pow(2, Math.ceil(Math.log(length) / Math.log(2))), 1);
let rawBuffer = transform.inverse(rawReal, rawImag);
return rawBuffer.subarray(0, length);
}
function doSpectrumFiltering(buffer, length, callback, isRmsOnly) {
if (length % 4 !== 0)
throw new Error("length is not a multiple of 4.");
const windowA = [0.3635819, -0.4891775, 0.1365995, -0.0106411];
let window = new Float32Array(length);
for (let w = 0; w < window.length; ++w)
for (let a = 0; a < windowA.length; ++a) {
window[w] += windowA[a] * Math.cos(2 * a * Math.PI * w / length);
}
let inBuffer = new Float32Array(Math.ceil(buffer.length / length + 2) * length);
let outBuffer = new Float32Array(inBuffer.length);
inBuffer.set(buffer, length);
let startSample = 0;
let endSample = length;
let isChanged = false;
let rawBuffer = new Float32Array(length);
while (endSample < inBuffer.length) {
rawBuffer.set(inBuffer.subarray(startSample, endSample));
let rms = 0;
for (let b = 0; b < rawBuffer.length; ++b)
rms += rawBuffer[b] * rawBuffer[b];
rms = Math.sqrt(rms / rawBuffer.length);
for (let w = 0; w < window.length; ++w)
rawBuffer[w] *= window[w];
let spectrum = isRmsOnly ? new Float32Array(0) : fft(rawBuffer);
spectrum = callback(spectrum, rms, startSample - length / 2);
let tmpOutBuffer = spectrum ? ifft(spectrum.real, spectrum.imag, rawBuffer.length) : rawBuffer;
if (spectrum)
isChanged = true;
for (let b = 0; b < tmpOutBuffer.length; ++b)
outBuffer[b + startSample] += tmpOutBuffer[b];
startSample += length / 4;
endSample += length / 4;
}
if (!isChanged)
return buffer;
buffer.set(outBuffer.subarray(length, buffer.length + length));
for (let b = 0; b < buffer.length; ++b)
buffer[b] /= 4 * windowA[0];
return buffer;
}
function getOverlappingSpectrumAndRms(buffer, length, isRmsOnly) {
let samples = [];
let spectrums = [];
let rmss = [];
doSpectrumFiltering(buffer, length, function (spectrum, rms, middleSample) {
samples.push(middleSample);
spectrums.push(spectrum);
rmss.push(rms);
}, isRmsOnly);
return {
sample: samples,
spectrum: spectrums,
rms: rmss
};
}
function averageSpectrums(spectrums) {
let result = new Float32Array(spectrums[0].real.length);
for (let s = 0; s < spectrums.length; ++s) {
let spectrum = realiseSpectrum(spectrums[s]);
for (let f = 0; f < result.length; ++f)
result[f] += spectrum[f] / spectrums.length;
}
return result;
}
function frequencyToIndex(frequency, length, sampleRate) {
return Math.round(2 * frequency * (length - 1) / sampleRate);
}
function indexToFrequency(index, length, sampleRate) {
return index * sampleRate / (2 * (length - 1));
}
function realiseSpectrum(spectrum) {
let realSpectrum = new Float32Array(spectrum.real.length);
for (let f = 0; f < realSpectrum.length; ++f)
realSpectrum[f] = Math.sqrt(spectrum.real[f] * spectrum.real[f] + spectrum.imag[f] * spectrum.imag[f]);
return realSpectrum;
}
function findPeaks(array) {
let peaks = [];
for (let e = 1; e < array.length - 1; ++e)
if (array[e] > array[e - 1] && array[e] > array[e + 1])
peaks.push({index: e, value: array[e]});
return peaks.sort(function (a, b) {
return b.value - a.value;
});
}
function bandPass(startFrequency, endFrequency, spectrum, sampleRate) {
let startF = frequencyToIndex(startFrequency, spectrum.real.length, sampleRate);
startF = startF > spectrum.real.length ? spectrum.real.length : startF;
let endF = frequencyToIndex(endFrequency, spectrum.real.length, sampleRate);
for (let f = 0; f < startF; ++f)
spectrum.real[f] = spectrum.imag[f] = 0;
for (let f = endF; f < spectrum.real.length; ++f)
spectrum.real[f] = spectrum.imag[f] = 0;
return spectrum;
}
function lowPass(cutoffFrequency, spectrum, sampleRate) {
return bandPass(0, cutoffFrequency, spectrum, sampleRate);
}
function highPass(cutoffFrequency, spectrum, sampleRate) {
return bandPass(cutoffFrequency, Infinity, spectrum, sampleRate);
}
function calculateStandardDeviation(values) {
let mean = 0;
for (let e = 0; e < values.length; ++e)
mean += values[e];
mean /= values.length;
let sd = 0;
for (let e = 0; e < values.length; ++e)
mean += values[e];
}
function getMeanAndSdWithoutOutliers(values, cutoffZ) {
values = values.filter(function (v) { return -Infinity < v && v < Infinity; }).sort(function (a, b) { return b - a; });
let sum = 0;
let squaredSum = 0;
for (let v = 0; v < values.length; ++v) {
sum += values[v];
squaredSum += values[v] * values[v];
}
let valuesStart = 0;
let valuesEnd = values.length;
let elements = values.length;
let mean;
let sd;
while (true) {
mean = sum / elements;
sd = Math.sqrt(elements * squaredSum - sum * sum) / elements;
if (isNaN(sd)) {
sd = 0;
break;
}
if (values[valuesStart] - mean > cutoffZ * sd) {
let value = values[valuesStart]
sum -= value;
squaredSum -= value * value;
--elements;
++valuesStart;
continue;
}
if (mean - values[valuesEnd - 1] > cutoffZ * sd) {
let value = values[valuesEnd - 1]
sum -= value;
squaredSum -= value * value;
--elements;
--valuesEnd;
continue;
}
break;
}
return {values: values, mean: mean, sd: sd, sum: sum, squaredSum: squaredSum, valuesStart: valuesStart, valuesEnd: valuesEnd, elements: elements};
}
function removeNoise(buffer) {
let paddedBuffer = new Float32Array(Math.floor(1.5 * buffer.length));
paddedBuffer.set(buffer, Math.floor(buffer.length / 4));
let analysis = getOverlappingSpectrumAndRms(paddedBuffer, 4096);
let spectrumMagnitudes = analysis.spectrum.map(realiseSpectrum); // [time][frequency] -> magnitude
let transposedSpectrumMagnitudes = new Array(spectrumMagnitudes[0].length); // [frequency][time] -> magnitude
for (let f = 0; f < transposedSpectrumMagnitudes.length; ++f)
transposedSpectrumMagnitudes[f] = new Float32Array(spectrumMagnitudes.length);
for (let t = 0; t < spectrumMagnitudes.length; ++t)
for (let f = 0; f < transposedSpectrumMagnitudes.length; ++f)
transposedSpectrumMagnitudes[f][t] = spectrumMagnitudes[t][f];
let noiseSpectrum = transposedSpectrumMagnitudes.map(function (frequencyOverTime) {
let stats = getMeanAndSdWithoutOutliers(Array.prototype.map.call(frequencyOverTime, rmsToPower), 2);
return powerToRms(stats.mean + stats.sd);
});
doSpectrumFiltering(buffer, 4096, function (spectrum) {
for (let f = 0; f < spectrum.real.length; ++f) {
let mod = Math.sqrt(spectrum.real[f] * spectrum.real[f] + spectrum.imag[f] * spectrum.imag[f]);
let arg = Math.atan2(spectrum.imag[f], spectrum.real[f]);
mod -= noiseSpectrum[f];
if (mod <= 0)
spectrum.real[f] = spectrum.imag[f] = 0;
else {
spectrum.real[f] = mod * Math.cos(arg);
spectrum.imag[f] = mod * Math.sin(arg);
}
}
return spectrum;
});
}
function detectPitch(buffer, sampleRate) {
let spectrum = realiseSpectrum(fft(buffer));
let correlatedSpectrum = new Float32Array(spectrum);
for (let n = 2; n < 5; ++n)
for (let f = 0; f < spectrum.length; ++f)
correlatedSpectrum[f] *= spectrum[f * n] || 0;
return indexToFrequency(findPeaks(correlatedSpectrum)[0].index, correlatedSpectrum.length, sampleRate);
}
function trimSilence(buffer) {
let start = 0;
let end = buffer.length - 1;
for (; buffer[start] === 0; ++start);
for (; buffer[end] === 0; --end);
// Include the first and final 0s
start -= 2;
end += 2;
if (start < 0)
start = 0;
if (end > buffer.length)
end = buffer.length;
return buffer.subarray(start, end);
}
function findSilences(buffer, minimumSamples) {
let silences = [];
for (let start = 0, end = 0; end < buffer.length; ++end)
if (buffer[end] !== 0) {
if (end - start > minimumSamples)
silences.push({start: start, end: end});
start = end + 1;
}
return silences;
}
let guardedReadAudio = Promise.guard(1, readAudio);
for (let a = 3; a < process.argv.length; ++a) {
let filename = process.argv[a];
guardedReadAudio(filename).then(function (data) {
let buffer = removeDcOffset(data.buffer);
let sampleRate = data.sampleRate;
let duration = buffer.length / sampleRate;
removeNoise(buffer);
doSpectrumFiltering(buffer, 4096, function (spectrum) {
highPass(120, spectrum, sampleRate);
return spectrum;
});
let peakRms = findPeaks(getOverlappingSpectrumAndRms(buffer, 2048, true).rms)[0].value;
doSpectrumFiltering(buffer, 2048, function (spectrum, rms) {
if (rmsToPower(peakRms) - rmsToPower(rms) > 17) {
highPass(Infinity, spectrum, 1);
return spectrum;
}
});
buffer = trimSilence(buffer);
if (buffer.length <= 2)
return Promise.reject("buffer empty");
let loudestFreq = detectPitch(buffer, sampleRate);
let ratio = powerToRms(equalLoudnessContourAt(loudestFreq)) / peakRms;
for (let b = 0; b < buffer.length; ++b)
buffer[b] *= ratio;
let silences = findSilences(buffer, 0.1 * sampleRate);
if (silences.length > 0)
console.warn(filename, "has silence at:", silences.map(function (silence) { return (silence.end - silence.start) / sampleRate; }).join(", "));
//if (silences.length !== 2) // keep between
// return Promise.reject(silences);
//else
// buffer = trimSilence(buffer.subarray(silences[0].start, silences[1].end));
//buffer = trimSilence(buffer.subarray(0, silences[0].end)); // remove end
//buffer = trimSilence(buffer.subarray(silences[silences.length - 1].start)); // remove start
return writeAudio(buffer, sampleRate, process.argv[2] + Math.round(loudestFreq) + "-" + filename);
}).catch(function (e) {
console.warn(filename, e.stack || e);
}).done();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment