Skip to content

Instantly share code, notes, and snippets.

@icub3d
Created January 18, 2024 03:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save icub3d/34dd7a95ab84e8acf6df2fc82b09b988 to your computer and use it in GitHub Desktop.
Save icub3d/34dd7a95ab84e8acf6df2fc82b09b988 to your computer and use it in GitHub Desktop.
Byes vs Chars in Rust
// A helper function to print some details about a string.
fn string_details(string: &str) {
println!("string: {}", string);
println!("char details (char: index byte_index bytes)");
// The index and byte_index may be different for non-ASCII characters.
for (index, (byte_index, c)) in string.char_indices().enumerate() {
println!(
" {:?}: {:02} {:02} {:?}",
c,
index,
byte_index,
c.to_string().as_bytes()
);
}
println!();
}
fn main() {
// Print some information about an ASCII string.
let english = "Hello, world";
string_details(english);
// Print some information about a UTF-8 string.
let korean = "안녕세상";
string_details(korean);
let emoji = "👋🌎";
string_details(emoji);
// Let's see how long it takes to count vowels in a non-trivially sized string using the
// chars() iterator.
let data = std::fs::read_to_string("/usr/share/dict/cracklib-small").unwrap();
let now = std::time::Instant::now();
let mut vowels = 0;
for c in data.chars() {
if ['a', 'e', 'i', 'o', 'u'].contains(&c) {
vowels += 1;
}
}
println!("{} vowels in {:?}", vowels, now.elapsed());
// Now let's see how long it takes to count vowels in a non-trivially sized string using the
// bytes() iterator.
let data = std::fs::read_to_string("/usr/share/dict/cracklib-small").unwrap();
let now = std::time::Instant::now();
let mut vowels = 0;
for b in data.bytes() {
if [b'a', b'e', b'i', b'o', b'u'].contains(&b) {
vowels += 1;
}
}
println!("{} vowels in {:?}", vowels, now.elapsed());
}
// https://leetcode.com/problems/minimum-window-substring/description/
pub fn min_window_substring_chars(s: &str, t: &str) -> String {
use std::collections::HashMap;
// *state*
let mut counts: HashMap<char, isize> = t.chars().fold(HashMap::new(), |mut counts, c| {
*counts.entry(c).or_default() += 1;
counts
});
let (mut min_left, mut min_right) = (0, 0);
// *iterate*
//
// Track our left position and the character at that position.
let mut left_iter = s.chars().enumerate();
let (mut left, mut left_char) = left_iter.next().unwrap();
for (right, b) in s.chars().enumerate() {
// *update state*
if let Some(count) = counts.get_mut(&b) {
*count -= 1
}
while counts.values().all(|&count| count <= 0) {
if right - left + 1 < (min_right - min_left + 1) || (min_left == 0 && min_right == 0) {
(min_left, min_right) = (left, right);
}
if let Some(count) = counts.get_mut(&left_char) {
*count += 1;
}
// Move left. We need to make sure we don't move past the end of the string. If we do,
// we know we are done.
if let Some((l, c)) = left_iter.next() {
left = l;
left_char = c;
} else {
break;
}
}
}
s.chars()
.skip(min_left)
.take(min_right - min_left + 1)
.collect()
}
// https://leetcode.com/problems/minimum-window-substring/description/
// Let's modify our min_window_substring function to be more performant assuming we only get ASCII.
// If we do need to be mindful of UTF-8, we can use the chars() iterator instead of the bytes().
pub fn min_window_substring(s: &str, t: &str) -> String {
use std::collections::HashMap;
// *state*
let mut counts: HashMap<u8, isize> = t.bytes().fold(HashMap::new(), |mut counts, b| {
*counts.entry(b).or_default() += 1;
counts
});
let mut min_window = "";
// *iterate*
//
// Track our left position and the character at that position.
let mut left_iter = s.bytes().enumerate();
let (mut left, mut left_char) = left_iter.next().unwrap();
for (right, b) in s.bytes().enumerate() {
// *update state*
if let Some(count) = counts.get_mut(&b) {
*count -= 1
}
while counts.values().all(|&count| count <= 0) {
if right - left + 1 < min_window.len() || min_window.is_empty() {
min_window = &s[left..=right];
}
if let Some(count) = counts.get_mut(&left_char) {
*count += 1;
}
// Move left. We need to make sure we don't move past the end of the string. If we do,
// we know we are done.
if let Some((l, c)) = left_iter.next() {
left = l;
left_char = c;
} else {
break;
}
}
}
min_window.to_string()
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_min_window_substring() {
let s = "ADOBECODEBANC";
let t = "ABC";
assert_eq!(min_window_substring(s, t), "BANC");
let s = "a";
let t = "a";
assert_eq!(min_window_substring(s, t), "a");
let s = "a";
let t = "aa";
assert_eq!(min_window_substring(s, t), "");
let s = "a";
let t = "b";
assert_eq!(min_window_substring(s, t), "");
let s = "aa";
let t = "aa";
assert_eq!(min_window_substring(s, t), "aa");
let s = "bbaa";
let t = "aba";
assert_eq!(min_window_substring(s, t), "baa");
let s = "bbaa";
let t = "abb";
assert_eq!(min_window_substring(s, t), "bba");
let s = "bbaa";
let t = "abaaa";
assert_eq!(min_window_substring(s, t), "");
}
#[test]
fn test_min_window_substring_chars() {
let s = "ADOBECODEBANC";
let t = "ABC";
assert_eq!(min_window_substring_chars(s, t), "BANC");
let emojis = "👋🌎😁😀😃😆😄😁😆";
let t = "😁😆";
assert_eq!(min_window_substring_chars(emojis, t), "😁😆");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment