Skip to content

Instantly share code, notes, and snippets.

@camas
Last active July 1, 2021 12:22
Show Gist options
  • Save camas/80113751bcdbb7c8b284d505efacf8d4 to your computer and use it in GitHub Desktop.
Save camas/80113751bcdbb7c8b284d505efacf8d4 to your computer and use it in GitHub Desktop.
Read a single utf8 char using rust
// See: https://en.wikipedia.org/wiki/UTF-8#Encoding
fn read_char<T: Read>(data: &mut T) -> char {
let b = read_u8(data);
let value;
if (b & 0b1111_1000) == 0b1111_0000 {
value = (((b & 0b0000_0111) as u32) << 18)
+ (((read_u8(data) & 0b0011_1111) as u32) << 12)
+ (((read_u8(data) & 0b0011_1111) as u32) << 6)
+ ((read_u8(data) & 0b0011_1111) as u32);
} else if (b & 0b1111_0000) == 0b1110_0000 {
value = (((b & 0b0000_1111) as u32) << 12)
+ (((read_u8(data) & 0b0011_1111) as u32) << 6)
+ ((read_u8(data) & 0b0011_1111) as u32);
} else if (b & 0b1110_0000) == 0b1100_0000 {
value = (((b & 0b0001_1111) as u32) << 6) + ((read_u8(data) & 0b0011_1111) as u32);
} else {
value = b as u32;
}
char::from_u32(value).unwrap()
}
fn read_u8<T: Read>(data: &mut T) -> u8 {
let mut buf = [0; 1];
data.read_exact(&mut buf).unwrap();
buf[0]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_char() {
let data = vec![0xF0, 0x9F, 0x99, 0x82];
assert_eq!(read_char(&mut &data[..]), '🙂');
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment