Created
June 9, 2022 15:18
-
-
Save ronin/b176ec6a26c71f3a678374e642c42a75 to your computer and use it in GitHub Desktop.
Base64 encoder written in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const CHARS: [char; 64] = [ | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', | |
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', | |
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', | |
'5', '6', '7', '8', '9', '+', '/', | |
]; | |
fn encode_bytes(chunk: &[u8]) -> [char; 4] { | |
let combined: u32 = ((chunk[0] as u32) << 16) + ((chunk[1] as u32) << 8) + (chunk[2] as u32); | |
let part_1 = ((combined & 0b111111000000000000000000) >> 18) as usize; | |
let part_2 = ((combined & 0b000000111111000000000000) >> 12) as usize; | |
let part_3 = ((combined & 0b000000000000111111000000) >> 6) as usize; | |
let part_4 = (combined & 0b000000000000000000111111) as usize; | |
[CHARS[part_1], CHARS[part_2], CHARS[part_3], CHARS[part_4]] | |
} | |
fn get_input() -> String { | |
let mut input = String::new(); | |
std::io::stdin().read_line(&mut input).unwrap(); | |
input.trim().to_string() | |
} | |
fn get_size(input: &[u8]) -> (usize, usize) { | |
let size = input.len(); | |
let reminder = size % 3; | |
(size, size + ((3 - reminder) % 3)) | |
} | |
fn pad_bytes(input: &[u8], size: usize) -> Vec<u8> { | |
let mut padded_input: Vec<u8> = input.to_vec(); | |
for _ in (input.len())..size { | |
padded_input.push(0); | |
} | |
padded_input | |
} | |
fn main() { | |
let input = get_input(); | |
let input = input.as_bytes(); | |
let (original_size, padded_size) = get_size(input); | |
let input = pad_bytes(input, padded_size); | |
let mut output = String::new(); | |
for chunk in input.chunks(3) { | |
for c in encode_bytes(&chunk) { | |
output.push(c); | |
} | |
} | |
for _ in original_size..padded_size { | |
output.pop(); | |
} | |
for _ in original_size..padded_size { | |
output.push('='); | |
} | |
println!("{}", output); | |
} |
The last two loops can be replaced with these lines:
output.truncate(original_size);
output.extend((original_size..padded_size).map(|_| '='));
fn pad_bytes(input: &[u8], size: usize) -> Vec<u8> {
input
.iter()
.copied()
.chain((input.len()..size).map(|_| 0))
.collect()
}
Besides it being more declarative, it also benefits from not having to reallocate memory when a push
call happens to overflow the initially allocated capacity in the original code. Rust iterators and iterator combinators are often smart enough to figure out the required capacity of the accumulator when calling collect
.
Random thought: it would probably be more efficient if you did all the byte manipulations on the vector of u8's and converted that vector to String only at the very very end. This way you could just replace the last few elements of the array with =
instead of doing pop
/push
or truncate/extend
.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Regarding line 21,
std::io::stdin().read_line(&mut input).unwrap();
. It's probably better to return Result<String, _> instead of string and do the unwraps only in the top-level main function.