Skip to content

Instantly share code, notes, and snippets.

@runeb
Last active September 25, 2022 20:03
Show Gist options
  • Save runeb/ad528c62c2ab4eb41f1ed3189ac39fc1 to your computer and use it in GitHub Desktop.
Save runeb/ad528c62c2ab4eb41f1ed3189ac39fc1 to your computer and use it in GitHub Desktop.
import { assert } from "console"
import { open } from "fs/promises"
const strEncoder = new TextDecoder("utf-8")
function readString(scanner: DataView, cursor: number, len: number) {
const bytes = []
for (let i = 0; i < len; i++) {
bytes[i] = scanner.getUint8(cursor++)
}
const a = new Uint8Array(bytes)
return strEncoder.decode(a)
}
async function parse(filePath: string) {
const fd = await open(filePath)
const aBuf = new ArrayBuffer(1024)
const scanner = new DataView(aBuf)
let ptr = 0
let res
/* WAV file header format
Positions Sample Value Description
1 - 4 "RIFF" Marks the file as a riff file. Characters are each 1. byte long.
5 - 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
9 -12 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
13-16 "fmt " Format chunk marker. Includes trailing null
17-20 16 Length of format data as listed above
21-22 1 Type of format (1 is PCM) - 2 byte integer
23-24 2 Number of Channels - 2 byte integer
25-28 44100 Sample Rate - 32 bit integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8.
33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
35-36 16 Bits per sample
37-40 "data" "data" chunk header. Marks the beginning of the data section.
41-44 File size (data) Size of the data section, i.e. file size - 44 bytes header.
*/
console.log('File:', filePath)
// Read the magic marker, which should be "RIFF" or "RIFX"
let str
// WAVE headers are not always 44 bytes...so you should really just read the
// fields step by step and find your way to the data segment
res = await fd.read(scanner, 0, 44, 0)
if (res.bytesRead) {
str = readString(scanner, ptr, 4)
ptr += 4
}
let littleEndian = true
if (str !== 'RIFF') {
if (str !== 'RIFX') {
console.log('Unsupported file', filePath)
fd.close()
process.exit(1)
} else {
littleEndian = false
}
}
// Read size of file from here on out
const size = scanner.getInt32(ptr, littleEndian)
ptr += 4
// read 'WAVE'
str = readString(scanner, ptr, 4)
ptr += 4
assert(str === 'WAVE')
// read 'fmt '
str = readString(scanner, ptr, 4)
ptr += 4
assert(str === 'fmt ')
// read fmt len
const fmtSize = scanner.getInt32(ptr, littleEndian)
ptr += 4
const type = scanner.getUint16(ptr, littleEndian)
ptr += 2
if (type !== 1) {
console.log('Unsupported audio encoding', filePath)
fd.close()
process.exit(1)
}
console.log('Type: PCM')
const channels = scanner.getUint16(ptr, littleEndian)
ptr += 2
console.log('Channels:', channels)
const sampleRate = scanner.getInt32(ptr, littleEndian)
ptr += 4
console.log('Sample rate:', sampleRate)
const frameSizeInBytes = scanner.getInt32(ptr, littleEndian)
ptr += 4
console.log('Frame size:', frameSizeInBytes)
const bytesPerSampleFrame = scanner.getUint16(ptr, littleEndian)
ptr += 2
console.log('Audio:', {
1: '8 bit mono',
2: '8 bit stereo/16 bit mono',
4: '16 bit stereo',
6: '24 bit stereo',
}[bytesPerSampleFrame] || `${bytesPerSampleFrame} bytes per sample frame`)
const bitsPerSample = scanner.getUint16(ptr, littleEndian)
ptr += 2
console.log('Bits per sample:', bitsPerSample)
str = readString(scanner, ptr, 4)
ptr += 4
assert(str === 'data')
const dataSize = scanner.getInt32(ptr, littleEndian)
ptr += 4
const sampleCount = dataSize / bytesPerSampleFrame
console.log('Duration:', sampleCount / sampleRate, 'seconds')
// From here until EOF, every `bytesPerSampleFrame` bytes of data contains one
// frame of audio (one sample for all channels). We could read them in,
// average their amplitudes over a determined sample count, and use this data
// to draw a waveform
fd.close()
}
if (process.argv.length < 3) {
console.log(`Usage: ${process.argv[1]} <file.wav>`)
}
parse(process.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment