Skip to content

Instantly share code, notes, and snippets.

@wuriyanto48
Created September 27, 2021 02:47
Show Gist options
  • Select an option

  • Save wuriyanto48/b28763082123831b7a7cc66a9686d1b7 to your computer and use it in GitHub Desktop.

Select an option

Save wuriyanto48/b28763082123831b7a7cc66a9686d1b7 to your computer and use it in GitHub Desktop.
Rust UTF-8 Encoding
const MAX_ONE_BYTE: u32 = 0x80; // 128
const MAX_TWO_BYTE: u32 = 0x800; // 2048
const MAX_THREE_BYTE: u32 = 0x10000; // 65536
const MASK: u32 = 0x3F; // 63 // 00111111
const CONTINUATION_MASK: u32 = 0x80; // 128 // 10000000
const TWO_BYTE_MASK: u32 = 0xC0; // 192 // 11000000
const THREE_BYTE_MASK: u32 = 0xE0; // 224 // 11100000
const FOUR_BYTE_MASK: u32 = 0xF0; // 240 // 11110000
fn encode_utf8(c: char, out: &mut Vec<u8>) -> Result<(), String> {
let c_decimal: u32 = c as u32;
if c_decimal < MAX_ONE_BYTE {
out.push(c_decimal as u8);
return Ok(());
}
if c_decimal < MAX_TWO_BYTE {
let b_one: u8 = ((c_decimal >> 6) | TWO_BYTE_MASK) as u8;
let b_two: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8;
out.push(b_one);
out.push(b_two);
return Ok(());
}
if c_decimal < MAX_THREE_BYTE {
let b_one: u8 = ((c_decimal >> 12) | THREE_BYTE_MASK) as u8;
let b_two: u8 = (((c_decimal >> 6) & MASK) | CONTINUATION_MASK) as u8;
let b_three: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8;
out.push(b_one);
out.push(b_two);
out.push(b_three);
return Ok(());
}
let b_one: u8 = ((c_decimal >> 18) | FOUR_BYTE_MASK) as u8;
let b_two: u8 = (((c_decimal >> 12) & MASK) | CONTINUATION_MASK) as u8;
let b_three: u8 = (((c_decimal >> 6) & MASK) | CONTINUATION_MASK) as u8;
let b_four: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8;
out.push(b_one);
out.push(b_two);
out.push(b_three);
out.push(b_four);
Ok(())
}
fn main() {
let sigma = 'Ʃ';
let star = '✪';
let rose_emoji = '🌹';
let mut res: Vec<u8> = Vec::new();
if let Err(e) = encode_utf8(rose_emoji, &mut res) {
println!("{}", e);
std::process::exit(1);
}
println!("{:?}", res);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment