Skip to content

Instantly share code, notes, and snippets.

@grey-area
Created November 30, 2022 15:45
Show Gist options
  • Save grey-area/3954ae5611800452b7b3e7c7342cf28a to your computer and use it in GitHub Desktop.
Save grey-area/3954ae5611800452b7b3e7c7342cf28a to your computer and use it in GitHub Desktop.
import struct
from pathlib import Path
import wave
def parse_data(data):
# the first 24 bytes are the header
header = data[:24]
# the remaining bytes are the data
body = data[24:]
# parse the header into 6 32 bit unsigned ints, little endian
(job_id, sequence_id, finished_flag, num_audio_bytes,
num_face_bytes, num_timing_bytes) = struct.unpack('<6I', header)
# the remaining data is split into 3 parts (audio, face, timing)
# audio comes first, so you can use the num_audio_bytes to split it off
audio_data = body[:num_audio_bytes]
# we don't need it yet, but here are the other two parts
face_data = body[num_audio_bytes:num_audio_bytes + num_face_bytes]
timing_data = body[num_audio_bytes + num_face_bytes:]
# the audio part itself has a header of four bytes, but just throw it away
audio_data = audio_data[4:]
# the remaining audio data is a series of 16 bit signed ints, little endian
# here I'm just going to return the raw bytes, stick them together,
# and put them in a file
return audio_data
if __name__ == "__main__":
# empty byte sequence
audio_data = b''
for i in range(1, 5):
filename = f'event_data ({i}).txt'
# read the raw bytes from the file
data = Path(filename).read_bytes()
# parse out the audio data, stick it on the end of the sequence
audio_data += parse_data(data)
# write the audio data to a file
with wave.open('output.wav', 'wb') as f:
f.setnchannels(1) # single channel, mono
f.setsampwidth(2) # 2 bytes per sample, i.e, 16 bits
f.setframerate(22050) # 22050 samples per second
f.writeframes(audio_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment