Skip to content

Instantly share code, notes, and snippets.

@mdchaney
Last active January 10, 2024 00:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdchaney/5fa8f026b8bcabe518880f849b3680c2 to your computer and use it in GitHub Desktop.
Save mdchaney/5fa8f026b8bcabe518880f849b3680c2 to your computer and use it in GitHub Desktop.
Original audio_time.rb
module AudioTime
# AIFF stores the samples per second as an IEEE 80-bit float, big-endian.
# This code will convert it to a 32-bit integer. This is not generalized
# code for this operation as it ignores the sign bit, part of the mantissa,
# etc.
def AudioTime.ieee80_to_long(raw_number)
discard, mantissa, hidden, exponent, sign = raw_number.reverse.unpack('b80').first.unpack('a32 a31 a1 a15 a1')
# Get the real exponent - the exponent is really exponent + 1
exponent = [exponent].pack('b15').unpack('v').first - 16382
# Now, use the exponent to pull the proper bits from the mantissa.
# The implicit "1" is added.
new_mantissa = mantissa.slice(exponent,999) + '1'
new_mantissa = [new_mantissa].pack('b*').unpack('v').first
new_mantissa += 1 if exponent > 0 && mantissa.slice(exponent-1) == '1'
return new_mantissa
end
def AudioTime.parse_aiff_comm_chunk(raw_comm_chunk)
number_of_channels, number_of_sample_frames, sample_size, raw_sample_rate = raw_comm_chunk.unpack('n N n a10')
sample_rate = ieee80_to_long(raw_sample_rate)
bytes_per_sample = sample_size / 8
return { number_of_channels: number_of_channels, number_of_sample_frames: number_of_sample_frames, sample_size: sample_size, bits_per_sample: sample_size, sample_rate: sample_rate, bytes_per_sample: bytes_per_sample }
end
def AudioTime.aiff_info(filename)
raw_comm_chunk = comm_info = data_length = file_seconds = actual_file_size = nil
chunk_info = []
File.open(filename,'rb') do |file|
begin
raw_header = file.read(12)
if !raw_header || raw_header.size < 12
return { error: "Not an AIFF", valid: false, filename: filename }
end
form, filelen, aiff = raw_header.unpack('A4NA4')
if form == 'FORM' && aiff =~ /\AAIF[FC]\z/
# This odd code two lines below actually allows the difference
# in actual file size and expected file size to differ by 1.
# Some files add a padding byte if the last chunk is odd length,
# but don't add this to to the file size in the header, causing
# this minor inconsistency.
actual_file_size = File.size(file)
if (filelen + 8 - actual_file_size).abs > 1
return { error: "File size error for AIFF, header says #{filelen+8}, actual is #{actual_file_size}", valid: false, filename: filename }
end
# Read chunks, get the comm and ssnd chunks
until file.eof?
raw_chunk_header = file.read(8)
# This will force it to be "eof" above
next unless raw_chunk_header.size == 8
chunk_type, chunk_length = raw_chunk_header.unpack('A4N')
chunk_info.push({chunk_type: chunk_type, position: file.pos, length: chunk_length})
if chunk_type == 'COMM'
raw_comm_chunk = file.read(chunk_length)
comm_info = parse_aiff_comm_chunk(raw_comm_chunk)
else
data_length = chunk_length if chunk_type == 'SSND'
file.seek(chunk_length + (chunk_length.odd? ? 1 : 0),IO::SEEK_CUR)
end
end
else
return { error: "Not an AIFF", valid: false, filename: filename }
end
end
end
if !data_length.nil? && !comm_info.nil?
# Make sure chunk lengths add up. Each chunk has an 8-byte header,
# plus the file has a 12 byte header.
total_chunk_length = 12 + chunk_info.size * 8 + chunk_info.map { |ci| ci[:length] }.inject(0) { |s,n| s+n } + chunk_info.select { |ci| ci[:length].odd? }.size
if total_chunk_length == actual_file_size || (total_chunk_length - 1 == actual_file_size && chunk_info.last[:length].odd?)
average_bytes_per_second = comm_info[:bytes_per_sample] * comm_info[:number_of_channels] * comm_info[:sample_rate]
file_seconds = data_length.to_f / average_bytes_per_second.to_f
return comm_info.merge({format: 'aif', seconds: file_seconds, average_bytes_per_second: average_bytes_per_second, data_length: data_length, valid: true, filename: filename, chunk_info: chunk_info })
else
return { format: 'aif', error: "AIFF chunks total length is different than file size: #{total_chunk_length} vs. #{actual_file_size}", valid: false, filename: filename }
end
else
return { error: "Malformed AIFF", valid: false, filename: filename }
end
end
def AudioTime.parse_wav_fmt_chunk(raw_fmt_chunk)
# Expand it out to 40 characters
raw_fmt_chunk += (0.chr*(40-raw_fmt_chunk.size))
format_tag, number_of_channels, sample_rate, average_bytes_per_second, block_align, bits_per_sample, extended_size, valid_bits_per_sample, channel_mask, subformat = raw_fmt_chunk.unpack('v v V V v v v v V a16')
# In this case, the format tag is the first two bytes of the subformat
if extended_size == 22 && format_tag == 65534
format_tag = subformat.unpack('v').first
end
bytes_per_sample = bits_per_sample / 8
return { format_tag: format_tag, number_of_channels: number_of_channels, sample_rate: sample_rate, average_bytes_per_second: average_bytes_per_second, block_align: block_align, bits_per_sample: bits_per_sample, bytes_per_sample: bytes_per_sample, has_extension: (extended_size > 0), valid_bits_per_sample: valid_bits_per_sample, channel_mask: channel_mask, subformat: subformat }
end
def AudioTime.wav_info(filename)
raw_fmt_chunk = fmt_info = data_length = file_seconds = actual_file_size = nil
chunk_info = []
File.open(filename,'rb') do |file|
begin
raw_header = file.read(12)
if !raw_header || raw_header.size < 12
return { error: "Not a WAV file", valid: false, filename: filename }
end
riff, filelen, wave = raw_header.unpack('A4VA4')
if riff == 'RIFF' && wave == 'WAVE'
# This odd code two lines below actually allows the difference
# in actual file size and expected file size to differ by 1.
# Some files add a padding byte if the last chunk is odd length,
# but don't add this to to the file size in the header, causing
# this minor inconsistency.
actual_file_size = File.size(file)
if (filelen + 8 - actual_file_size).abs > 1
return { error: "File size error for WAV, header says #{filelen+8}, actual is #{actual_file_size}", valid: false, filename: filename }
end
# Read chunks, get the fmt and data chunks
until file.eof?
raw_chunk_header = file.read(8)
# This will force it to be "eof" above
next unless raw_chunk_header.size == 8
chunk_type, chunk_length = raw_chunk_header.unpack('A4V')
chunk_info.push({chunk_type: chunk_type, position: file.pos, length: chunk_length})
if chunk_type == 'fmt'
raw_fmt_chunk = file.read(chunk_length)
fmt_info = parse_wav_fmt_chunk(raw_fmt_chunk)
else
data_length = chunk_length if chunk_type == 'data'
# If the chunk length is odd, there should be a padding byte.
file.seek(chunk_length + (chunk_length.odd? ? 1 : 0),IO::SEEK_CUR)
end
end
else
return { error: "Not a WAV", valid: false, filename: filename }
end
end
end
if !data_length.nil? && !fmt_info.nil?
# Make sure chunk lengths add up. Each chunk has an 8-byte header,
# plus the file has a 12 byte header. Odd sized chunks should be
# followed by a padding byte, so that is added in at the end.
# Note: the last chunk won't have a padding byte.
total_chunk_length = 12 + chunk_info.size * 8 + chunk_info.map { |ci| ci[:length] }.inject(0) { |s,n| s+n } + chunk_info.select { |ci| ci[:length].odd? }.size
if total_chunk_length == actual_file_size || (total_chunk_length - 1 == actual_file_size && chunk_info.last[:length].odd?)
number_of_sample_frames = data_length / (fmt_info[:bytes_per_sample] * fmt_info[:number_of_channels])
file_seconds = number_of_sample_frames.to_f / fmt_info[:sample_rate].to_f
return fmt_info.merge({format: 'wav', number_of_sample_frames: number_of_sample_frames, seconds: file_seconds, data_length: data_length, valid: true, filename: filename, chunk_info: chunk_info })
else
return { format: 'wav', error: "WAV chunks total length is different than file size: #{total_chunk_length} vs. #{actual_file_size}", valid: false, filename: filename }
end
else
return { error: "Malformed WAV", valid: false, filename: filename }
end
end
def AudioTime.parse_id3v2_header(header)
id, major_version, minor_version, flags, sizeb = header.unpack('A3 C C b8 b32')
size = [sizeb[24..30]+sizeb[16..22]+sizeb[8..14]+sizeb[0..6]+'0000'].pack('b32').unpack('l').first
return [id, major_version, minor_version, flags, size]
end
MP3_LOOKUPS = {
mpeg_audio_version_descriptions: {
'00' => 'MPEG Version 2.5',
'01' => 'Reserved',
'10' => 'MPEG Version 2 (ISO/IEC 13818-3)',
'11' => 'MPEG Version 1 (ISO/IEC 11172-3)'
},
mpeg_audio_version_ids: {
'00' => 2.5,
'01' => nil,
'10' => 2,
'11' => 1
},
layer_descriptions: {
'00' => 'Reserved',
'01' => 'Layer III',
'10' => 'Layer II',
'11' => 'Layer I'
},
layers: {
'00' => nil,
'01' => 3,
'10' => 2,
'11' => 1
},
# Yeah, I know it's just binary, this is as easy as anything
bitrate_indices: {
'0000' => 0, '0001' => 1, '0010' => 2, '0011' => 3,
'0100' => 4, '0101' => 5, '0110' => 6, '0111' => 7,
'1000' => 8, '1001' => 9, '1010' => 10, '1011' => 11,
'1100' => 12, '1101' => 13, '1110' => 14, '1111' => 15
},
# Lookup for bitrate is mpeg_audio_version_id, layer, and bitrate_index
bitrates: {
1 => {
1 => [ 'free', 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, nil ],
2 => [ 'free', 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, nil ],
3 => [ 'free', 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, nil ]
},
2 => {
1 => [ 'free', 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, nil ],
2 => [ 'free', 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, nil ],
3 => [ 'free', 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, nil ],
},
2.5 => {
1 => [ 'free', 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, nil ],
2 => [ 'free', 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, nil ],
3 => [ 'free', 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, nil ],
}
},
# Lookup for sampling_rate_frequencies is sampling_rate_frequency_index
# and mpeg_audio_version_id
sampling_rate_frequencies: {
'00' => { 1 => 44100, 2 => 22050, 2.5 => 11025 },
'01' => { 1 => 48000, 2 => 24000, 2.5 => 12000 },
'10' => { 1 => 32000, 2 => 16000, 2.5 => 8000 },
'11' => { 1 => nil, 2 => nil, 2.5 => nil }
},
channel_modes: {
'00' => 'stereo',
'01' => 'joint stereo (stereo)',
'10' => 'dual channel (stereo)',
'11' => 'single channel (mono)'
},
channel_counts: {
'00' => 2,
'01' => 2,
'10' => 2,
'11' => 1
},
emphases: {
'00' => 'none',
'01' => '50/15 ms',
'10' => 'reserved',
'11' => 'CCIT J.17'
},
# Lookup for samples_per_frame is mpeg_audio_version_id and layer
samples_per_frame: {
1 => { 1 => 384, 2 => 1152, 3 => 1152 },
2 => { 1 => 384, 2 => 1152, 3 => 576 },
2.5 => { 1 => 384, 2 => 1152, 3 => 576 }
},
# Lookup for mode extensions - layer I and II only
mode_extensions: {
'00' => { low_band: 4, high_band: 31 },
'01' => { low_band: 8, high_band: 31 },
'10' => { low_band: 12, high_band: 31 },
'11' => { low_band: 16, high_band: 31 }
}
}
def AudioTime.parse_mp3_frame_header(frame_header)
# MP3 frame is 4 bytes. Info for parsing is here:
# http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
# http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header
#
# Hopefully those will stay good for a long time :)
# Fields are of these lengths in bits: 11, 2, 2, 1, 4, 2, 1, 1, 2, 2, 1, 1, 2
frame_sync, raw_mpeg_audio_version_id, raw_layer_description, protection_bit, raw_bitrate_index, sampling_rate_frequency_index, padding_bit, private_bit, raw_channel_mode, raw_mode_extension, copyright_flag, original_flag, raw_emphasis = frame_header.unpack('B32').first.unpack('a11 a2 a2 a1 a4 a2 a1 a1 a2 a2 a1 a1 a2')
#STDERR.puts frame_sync, raw_mpeg_audio_version_id, raw_layer_description, protection_bit, raw_bitrate_index, sampling_rate_frequency_index, padding_bit, private_bit, raw_channel_mode, raw_mode_extension, copyright_flag, original_flag, raw_emphasis
return nil unless frame_sync == '11111111111' || frame_sync == '11111111110'
has_crc = (protection_bit == '0')
mpeg_audio_version_id = MP3_LOOKUPS[:mpeg_audio_version_ids][raw_mpeg_audio_version_id]
mpeg_audio_version_description = MP3_LOOKUPS[:mpeg_audio_version_descriptions][raw_mpeg_audio_version_id]
layer = MP3_LOOKUPS[:layers][raw_layer_description]
layer_description = MP3_LOOKUPS[:layer_descriptions][raw_layer_description]
bitrate_index = MP3_LOOKUPS[:bitrate_indices][raw_bitrate_index]
bitrate = MP3_LOOKUPS[:bitrates][mpeg_audio_version_id][layer][bitrate_index]
sampling_rate_frequency = MP3_LOOKUPS[:sampling_rate_frequencies][sampling_rate_frequency_index][mpeg_audio_version_id]
samples_per_frame = MP3_LOOKUPS[:samples_per_frame][mpeg_audio_version_id][layer]
channel_mode = MP3_LOOKUPS[:channel_modes][raw_channel_mode]
channel_count = MP3_LOOKUPS[:channel_counts][raw_channel_mode]
emphasis = MP3_LOOKUPS[:emphases][raw_emphasis]
if layer == 3
mode_extension = nil
ms_stereo = raw_mode_extension[0]
intensity_stereo = raw_mode_extension[1]
else
mode_extension = MP3_LOOKUPS[:mode_extensions][raw_mode_extension]
intensity_stereo = ms_stereo = nil
end
frame_slot_count = (samples_per_frame / 8) * (bitrate * 1000) / sampling_rate_frequency + padding_bit.to_i
frame_length =
if layer == 1
frame_slot_count * 4
else
frame_slot_count
end
return { frame_sync: frame_sync, raw_mpeg_audio_version_id: raw_mpeg_audio_version_id, raw_layer_description: raw_layer_description, protection_bit: protection_bit, raw_bitrate_index: raw_bitrate_index, sampling_rate_frequency_index: sampling_rate_frequency_index, padding_bit: padding_bit, private_bit: private_bit, raw_channel_mode: raw_channel_mode, raw_mode_extension: raw_mode_extension, copyright_flag: copyright_flag, original_flag: original_flag, raw_emphasis: raw_emphasis, mpeg_audio_version_id: mpeg_audio_version_id, mpeg_audio_version_description: mpeg_audio_version_description, layer_description: layer_description, has_crc: has_crc, sampling_rate_frequency: sampling_rate_frequency, channel_mode: channel_mode, channel_count: channel_count, emphasis: emphasis, frame_length: frame_length, bitrate: bitrate, layer: layer, mode_extension: mode_extension, ms_stereo: ms_stereo, intensity_stereo: intensity_stereo }
end
def AudioTime.mp3_info(filename)
# Need to find all possible id3 tag sets - id3v2 and beginning and/or
# end, and id3v1 at end. The idea is to separate out the actual MP3
# data so we can read and handle the frames.
File.open(filename, 'rb') do |file|
begin
mp3_offset = 0
file_size = mp3_length = file.size
has_v1 = has_pre_v2 = has_post_v2 = false
chunk = file.read(10)
if !chunk || chunk.size < 10
return { error: "Not an MP3 file", valid: false, filename: filename }
end
if chunk[0..2] == 'ID3'
id, major_version, minor_version, flags, size = parse_id3v2_header(chunk)
mp3_offset += size + 10
mp3_length -= size + 10
has_pre_v2 = true
#STDERR.puts "Has a v2 tag at start, #{size+10} bytes" if DEBUG
end
file.seek(-128, IO::SEEK_END)
chunk = file.read(3)
if !chunk || chunk.size < 3
return { error: "Not an MP3 file", valid: false, filename: filename }
end
# This is a v1 tag at the end
if chunk[0..2] == 'TAG'
mp3_length -= 128
has_v1 = true
#STDERR.puts "Has a v1 tag at end, 128 bytes" if DEBUG
end
# Check for v2 tag at the end if there wasn't a v2 tag at the
# beginning
unless has_pre_v2
if has_v1
file.seek(-138, IO::SEEK_END)
else
file.seek(-10, IO::SEEK_END)
end
chunk = file.read(10)
if !chunk || chunk.size < 10
return { error: "Not an MP3 file", valid: false, filename: filename }
end
if chunk[0..2] == '3DI'
id, major_version, minor_version, flags, size = parse_id3v2_header(chunk)
mp3_length -= size + 10
has_post_v2 = true
#STDERR.puts "Has a v2 tag at end, #{size+10} bytes" if DEBUG
end
end
# Sanity checks
if mp3_offset < 0 || mp3_offset > file_size || mp3_length < 0
return { error: "MP3 information is confused: offset #{mp3_offset} length #{mp3_length}", valid: false, filename: filename }
end
mp3_end_offset = mp3_offset + mp3_length
total_seconds = 0.0
#STDERR.puts "mp3_offset is #{mp3_offset}, mp3_length is #{mp3_length}" if DEBUG
# At this point, we can parse the MP3 file starting at mp3_offset and
# continuing for mp3_length bytes. Of course, we need to make sure
# frame is good.
low_bitrate = high_bitrate = nil
first_frame_info = nil
file_size_seconds = nil
mpeg_audio_version_id = nil
mpeg_audio_version_description = nil
layer = nil
layer_description = nil
sampling_rate_frequency = nil
ms_stereo = nil
intensity_stereo = nil
channel_mode = nil
frame_infos = []
file.seek(mp3_offset, IO::SEEK_SET)
until file.pos >= mp3_end_offset || file.eof?
frame_header = file.read(4)
if !frame_header || frame_header.size < 4
return { error: "Invalid MP3 file", valid: false, filename: filename }
end
frame_info = parse_mp3_frame_header(frame_header)
return { error: 'Invalid frame sync in MP3 file', valid: false, filename: filename } unless frame_info
frame_info[:offset] = file.pos
# TODO: Check CRC if present
low_bitrate = frame_info[:bitrate] if !low_bitrate || low_bitrate > frame_info[:bitrate]
high_bitrate = frame_info[:bitrate] if !high_bitrate || high_bitrate < frame_info[:bitrate]
mpeg_audio_version_id = frame_info[:mpeg_audio_version_id]
mpeg_audio_version_description = frame_info[:mpeg_audio_version_description]
layer = frame_info[:layer]
layer_description = frame_info[:layer_description]
sampling_rate_frequency = frame_info[:sampling_rate_frequency]
ms_stereo = frame_info[:ms_stereo]
intensity_stereo = frame_info[:intensity_stereo]
channel_mode = frame_info[:channel_mode]
total_seconds += frame_info[:frame_length].to_f * 8.0 / frame_info[:bitrate].to_f
first_frame_info = frame_info unless first_frame_info
frame_infos.push(frame_info)
#pp frame_info.inspect
file.seek(frame_info[:frame_length] - 4, IO::SEEK_CUR)
end
cbr = (low_bitrate == high_bitrate)
file_size_seconds = mp3_length.to_f * 8.0 / (low_bitrate.to_f * 1000.0) if cbr
mp3_info = first_frame_info
mp3_info[:format] = 'mp3'
mp3_info[:total_seconds] = total_seconds / 1000.0
mp3_info[:file_size_seconds] = file_size_seconds
mp3_info[:seconds] = mp3_info[:file_size_seconds] || mp3_info[:total_seconds]
mp3_info[:cbr] = cbr
mp3_info[:low_bitrate] = low_bitrate
mp3_info[:high_bitrate] = high_bitrate
mp3_info[:mpeg_audio_version_id] = mpeg_audio_version_id
mp3_info[:mpeg_audio_version_description] = mpeg_audio_version_description
mp3_info[:layer] = layer
mp3_info[:layer_description] = layer_description
mp3_info[:sampling_rate_frequency] = sampling_rate_frequency
mp3_info[:ms_stereo] = ms_stereo
mp3_info[:intensity_stereo] = intensity_stereo
mp3_info[:channel_mode] = channel_mode
mp3_info[:mp3_offset] = mp3_offset
mp3_info[:mp3_length] = mp3_length
mp3_info[:has_v1] = has_v1
mp3_info[:has_pre_v2] = has_pre_v2
mp3_info[:has_post_v2] = has_post_v2
mp3_info[:valid] = true
mp3_info[:filename] = filename
return mp3_info
rescue Errno::EINVAL => e
# Invalid seek
return { error: "Cannot seek in MP3 file", valid: false, filename: filename }
end
end
end
def AudioTime.aiff_time(filename)
info = aiff_info(filename)
return info[:seconds]
end
def AudioTime.wav_time(filename)
info = wav_info(filename)
return info[:seconds]
end
def AudioTime.mp3_time(filename)
info = mp3_info(filename)
return info[:seconds]
end
def AudioTime.aiff_lint(filename)
info = aiff_info(filename)
return info[:valid]
end
def AudioTime.wav_lint(filename)
info = wav_info(filename)
return info[:valid]
end
def AudioTime.mp3_lint(filename)
info = mp3_info(filename)
return info[:valid]
end
def AudioTime.valid_audio_file?(filename, type=nil)
if type
type = type.gsub(/\A\./,'').downcase
if type == 'aif' || type == 'aiff'
aiff_lint(filename)
elsif type == 'wav'
wav_lint(filename)
elsif type == 'mp3'
mp3_lint(filename)
else
nil
end
else
raw_header = nil
File.open(filename,'rb') do |file|
raw_header = file.read(12)
end
if !raw_header || raw_header.size < 12
false
elsif raw_header =~ /\AFORM....AIF[FC]/
aiff_lint(filename)
elsif raw_header =~ /\ARIFF....WAVE/
wav_lint(filename)
else
# MP3s are difficult to easily determine
mp3_lint(filename)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment