Created
September 27, 2021 02:47
-
-
Save wuriyanto48/b28763082123831b7a7cc66a9686d1b7 to your computer and use it in GitHub Desktop.
Rust UTF-8 Encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const MAX_ONE_BYTE: u32 = 0x80; // 128 | |
| const MAX_TWO_BYTE: u32 = 0x800; // 2048 | |
| const MAX_THREE_BYTE: u32 = 0x10000; // 65536 | |
| const MASK: u32 = 0x3F; // 63 // 00111111 | |
| const CONTINUATION_MASK: u32 = 0x80; // 128 // 10000000 | |
| const TWO_BYTE_MASK: u32 = 0xC0; // 192 // 11000000 | |
| const THREE_BYTE_MASK: u32 = 0xE0; // 224 // 11100000 | |
| const FOUR_BYTE_MASK: u32 = 0xF0; // 240 // 11110000 | |
| fn encode_utf8(c: char, out: &mut Vec<u8>) -> Result<(), String> { | |
| let c_decimal: u32 = c as u32; | |
| if c_decimal < MAX_ONE_BYTE { | |
| out.push(c_decimal as u8); | |
| return Ok(()); | |
| } | |
| if c_decimal < MAX_TWO_BYTE { | |
| let b_one: u8 = ((c_decimal >> 6) | TWO_BYTE_MASK) as u8; | |
| let b_two: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8; | |
| out.push(b_one); | |
| out.push(b_two); | |
| return Ok(()); | |
| } | |
| if c_decimal < MAX_THREE_BYTE { | |
| let b_one: u8 = ((c_decimal >> 12) | THREE_BYTE_MASK) as u8; | |
| let b_two: u8 = (((c_decimal >> 6) & MASK) | CONTINUATION_MASK) as u8; | |
| let b_three: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8; | |
| out.push(b_one); | |
| out.push(b_two); | |
| out.push(b_three); | |
| return Ok(()); | |
| } | |
| let b_one: u8 = ((c_decimal >> 18) | FOUR_BYTE_MASK) as u8; | |
| let b_two: u8 = (((c_decimal >> 12) & MASK) | CONTINUATION_MASK) as u8; | |
| let b_three: u8 = (((c_decimal >> 6) & MASK) | CONTINUATION_MASK) as u8; | |
| let b_four: u8 = ((c_decimal & MASK) | CONTINUATION_MASK) as u8; | |
| out.push(b_one); | |
| out.push(b_two); | |
| out.push(b_three); | |
| out.push(b_four); | |
| Ok(()) | |
| } | |
| fn main() { | |
| let sigma = 'Ʃ'; | |
| let star = '✪'; | |
| let rose_emoji = '🌹'; | |
| let mut res: Vec<u8> = Vec::new(); | |
| if let Err(e) = encode_utf8(rose_emoji, &mut res) { | |
| println!("{}", e); | |
| std::process::exit(1); | |
| } | |
| println!("{:?}", res); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment