Skip to content

Instantly share code, notes, and snippets.

@siritori
Created December 9, 2021 22:37
Show Gist options
  • Save siritori/e4e2dc4ec2076f075f17f9e56560d8ed to your computer and use it in GitHub Desktop.
Save siritori/e4e2dc4ec2076f075f17f9e56560d8ed to your computer and use it in GitHub Desktop.
Binary parsing test
use bytes::Buf;
pub trait BinaryRead: Sized {
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self>;
}
pub struct BinaryReader<T: bytes::Buf> {
pub buf: T,
}
fn read_string_from_u16_iter(len_u16: usize, iter: impl Iterator<Item = u16>) -> Option<String> {
let mut buf = String::with_capacity(len_u16 * 2);
for ch in std::char::decode_utf16(iter) {
buf.push(ch.ok()?)
}
Some(buf)
}
impl<T: bytes::Buf> BinaryReader<T> {
pub fn new(buf: T) -> Self {
BinaryReader { buf }
}
fn check_remaining(&self, required_size: usize) -> Option<()> {
if self.buf.remaining() < required_size {
None
} else {
Some(())
}
}
pub fn symbol(&mut self, symbol: &[u8]) -> Option<()> {
self.check_remaining(symbol.len())?;
if !self.buf.chunk().starts_with(symbol) {
None
} else {
self.buf.advance(symbol.len());
Some(())
}
}
pub fn sized_utf8_str(&mut self, len_u8: usize) -> Option<String> {
self.check_remaining(len_u8)?;
std::str::from_utf8(&self.buf.chunk()[..len_u8]).ok().map(&str::to_string)
}
pub fn null_terminated_utf8_str(&mut self) -> Option<String> {
let str_len = self.buf.chunk().iter().position(|&ch| ch == b'\0')?;
std::str::from_utf8(&self.buf.chunk()[..str_len]).ok().map(&str::to_string)
}
pub fn null_terminated_utf16_str(&mut self) -> Option<String> {
let size_all = self.buf.chunk().len() / 2;
let u16_iter: Vec<_> = self.buf.chunk()[..size_all * 2]
.chunks(2)
.map(|mut chunk| chunk.get_u16())
.take_while(|&ch| ch != 0)
.collect();
read_string_from_u16_iter(u16_iter.len(), u16_iter.into_iter())
}
pub fn null_terminated_utf16le_str(&mut self) -> Option<String> {
let size_all = self.buf.chunk().len() / 2;
let u16_iter: Vec<_> = self.buf.chunk()[..size_all * 2]
.chunks(2)
.map(|mut chunk| chunk.get_u16_le())
.take_while(|&ch| ch != 0)
.collect();
read_string_from_u16_iter(u16_iter.len(), u16_iter.into_iter())
}
pub fn sized_utf16_str(&mut self, len_u16: usize) -> Option<String> {
let u16_size = std::mem::size_of::<u16>();
self.check_remaining(len_u16 * u16_size)?;
let u8_slice = &self.buf.chunk()[..len_u16 * u16_size];
let u16_iter = u8_slice.chunks(u16_size).map(|mut chunk| { chunk.get_u16() });
read_string_from_u16_iter(len_u16, u16_iter)
}
pub fn sized_utf16le_str(&mut self, len_u16: usize) -> Option<String> {
let u16_size = std::mem::size_of::<u16>();
self.check_remaining(len_u16 * u16_size)?;
let u8_slice = &self.buf.chunk()[..len_u16 * u16_size];
let u16_iter = u8_slice.chunks(u16_size).map(|mut chunk| { chunk.get_u16_le() });
read_string_from_u16_iter(len_u16, u16_iter)
}
pub fn u8(&mut self) -> Option<u8> {
self.check_remaining(std::mem::size_of::<u8>())?;
Some(self.buf.get_u8())
}
pub fn i8(&mut self) -> Option<i8> {
self.check_remaining(std::mem::size_of::<i8>())?;
Some(self.buf.get_i8())
}
pub fn u16(&mut self) -> Option<u16> {
self.check_remaining(std::mem::size_of::<u16>())?;
Some(self.buf.get_u16())
}
pub fn u16le(&mut self) -> Option<u16> {
self.check_remaining(std::mem::size_of::<u16>())?;
Some(self.buf.get_u16_le())
}
pub fn i16(&mut self) -> Option<i16> {
self.check_remaining(std::mem::size_of::<i16>())?;
Some(self.buf.get_i16())
}
pub fn i16le(&mut self) -> Option<i16> {
self.check_remaining(std::mem::size_of::<i16>())?;
Some(self.buf.get_i16_le())
}
pub fn u32(&mut self) -> Option<u32> {
self.check_remaining(std::mem::size_of::<u32>())?;
Some(self.buf.get_u32())
}
pub fn u32le(&mut self) -> Option<u32> {
self.check_remaining(std::mem::size_of::<u32>())?;
Some(self.buf.get_u32_le())
}
pub fn read<U: BinaryRead>(&mut self) -> Option<U> {
U::read(self)
}
pub fn skip_read<U: BinaryRead + std::cmp::PartialEq>(&mut self, expected: U) -> Option<()> {
let v = U::read(self)?;
if v == expected {
Some(())
} else {
None
}
}
}
#[derive(Debug)]
enum MidiFormat {
Format0,
Format1,
Format2,
}
impl BinaryRead for MidiFormat {
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self> {
match reader.u16() {
Some(0x00_00) => Some(MidiFormat::Format0),
Some(0x00_01) => Some(MidiFormat::Format1),
Some(0x00_02) => Some(MidiFormat::Format2),
_ => return None,
}
}
}
#[derive(Debug)]
struct MidiHeader {
format: MidiFormat,
num_tracks: u16,
temporal_resolution: u16,
}
impl BinaryRead for MidiHeader {
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self> {
reader.symbol(b"MThd")?;
(reader.u32()? != 6).then(|| ())?;
Some(MidiHeader {
format: reader.read()?,
num_tracks: reader.u16()?,
temporal_resolution: reader.u16()?,
})
}
}
fn main() {
let buf: Vec<u8> = vec![0x30, 0x42, 0x30, 0x44, 0x30, 0x46, 0x30, 0x48, 0x30, 0x4a, 0x00, 0xFF, 0xFF];
let mut reader = BinaryReader::new(&buf[..]);
let s = reader.sized_utf16_str(5).unwrap_or("failure".to_string());
println!("{}", &s);
let mut reader = BinaryReader::new(&buf[..]);
let s = reader.null_terminated_utf16_str().unwrap_or("failure".to_string());
println!("{}", &s);
let buf: Vec<u8> = vec![0x42, 0x30, 0x44, 0x30, 0x46, 0x30, 0x48, 0x30, 0x4a, 0x30];
let mut reader = BinaryReader::new(&buf[..]);
let s = reader.sized_utf16le_str(5).unwrap_or("failure".to_string());
println!("{}", &s);
let buf: Vec<u8> = vec![230, 150, 135, 229, 173, 151, 229, 136, 151, 40, 85, 84, 70, 45, 49, 54, 41];
let mut reader = BinaryReader::new(&buf[..]);
let s = reader.sized_utf8_str(17).unwrap_or("failure".to_string());
println!("{}", &s);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment