Skip to content

Instantly share code, notes, and snippets.

@Yaulendil
Created December 22, 2020 18:42
Show Gist options
  • Save Yaulendil/7ec3d5bc961d844c945202080b98f4c1 to your computer and use it in GitHub Desktop.
Save Yaulendil/7ec3d5bc961d844c945202080b98f4c1 to your computer and use it in GitHub Desktop.
use std::{
marker::PhantomData,
slice::{from_raw_parts, from_raw_parts_mut},
str::{from_utf8_unchecked, from_utf8_unchecked_mut},
};
/// Determine the number of bytes in a UTF-8 encoded character, given the first
/// byte of the sequence.
///
/// Panics if given a byte which is not valid as a UTF-8 starting byte.
#[inline]
fn utf8_char_len(start_byte: u8) -> usize {
// Match the number of leading ones before the first zero.
match (start_byte ^ 0b_1111_1111_u8).leading_zeros() {
0 => 1, // 0xxx xxxx
2 => 2, // 110x xxxx 10xxxxxx
3 => 3, // 1110 xxxx 10xxxxxx 10xxxxxx
4 => 4, // 1111 0xxx 10xxxxxx 10xxxxxx 10xxxxxx
_ => panic!(
"Invalid UTF-8 starting byte: {b:#04X} [{b:#010b}]",
b = start_byte,
),
}
}
/// An iterator of mutable string slices over a mutable string slice.
///
/// Each slice returned corresponds to one `char` in the original string, in
/// UTF-8 encoding.
pub struct MutChars<'i> {
data: *mut u8,
end: *mut u8,
_p: PhantomData<&'i mut str>,
}
impl<'i> MutChars<'i> {
pub fn from(string: &'i mut str) -> Self {
let data: *mut u8 = string.as_mut_ptr();
let end: *mut u8 = unsafe { data.add(string.len()) };
Self { data, end, _p: PhantomData }
}
}
// impl<'i, T> From<T> for MutChars<'i> where
// T: AsMut<str> + 'i,
// {
// fn from(mut asmut: T) -> Self {
// let string: &mut str = asmut.as_mut();
//
// let data: *mut u8 = string.as_mut_ptr();
// let end: *mut u8 = unsafe { data.add(string.len()) };
//
// Self { data, end, _p: PhantomData }
// }
// }
impl<'i> Iterator for MutChars<'i> {
type Item = &'i mut str;
fn next(&mut self) -> Option<Self::Item> {
if self.data >= self.end { return None; }
unsafe {
let len: usize = utf8_char_len(*self.data);
let s = from_utf8_unchecked_mut(from_raw_parts_mut(self.data, len));
self.data = self.data.add(len);
Some(s)
}
}
}
/// An iterator of immutable string slices over an immutable string slice.
///
/// Each slice returned corresponds to one `char` in the original string, in
/// UTF-8 encoding.
pub struct StrChars<'i> {
data: *const u8,
end: *const u8,
_p: PhantomData<&'i str>,
}
impl<'i, T> From<T> for StrChars<'i> where
T: AsRef<str> + 'i,
{
fn from(asref: T) -> Self {
let string: &str = asref.as_ref();
let data: *const u8 = string.as_ptr();
let end: *const u8 = unsafe { data.add(string.len()) };
Self { data, end, _p: PhantomData }
}
}
impl<'i> Iterator for StrChars<'i> {
type Item = &'i str;
fn next(&mut self) -> Option<Self::Item> {
if self.data >= self.end { return None; }
unsafe {
let len: usize = utf8_char_len(*self.data);
let s = from_utf8_unchecked(from_raw_parts(self.data, len));
self.data = self.data.add(len);
Some(s)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mut_chars() {
let mut s: String = "äßdf 🐱 qwærþ".into();
dbg!(&s);
for c in MutChars::from(s.as_mut_str()) {
if c.len() == 4 {
// Take the String as its mutable bytes, and write a different
// character into it.
unsafe { '🗡'.encode_utf8(c.as_bytes_mut()); }
}
}
dbg!(&s);
assert_eq!("äßdf 🗡 qwærþ", &s);
for c in MutChars::from(&mut s) {
if c.len() == 1 {
// Increment the value of this character.
unsafe { c.as_bytes_mut()[0] += 1; }
}
}
dbg!(&s);
assert_eq!("äßeg!🗡!rxæsþ", &s);
for c in MutChars::from(&mut s[..]) {
if c.len() == 2 {
// Clobber this two-byte character, writing two new single-byte
// characters in its place.
unsafe { c.as_bytes_mut().clone_from_slice(b"ZW"); }
}
}
dbg!(&s);
assert_eq!("ZWZWeg!🗡!rxZWsZW", &s);
}
#[test]
fn test_str_chars() {
let string: String = "äßdf 🐱 qwærþ".into();
let mut i: usize = 0;
for (c, s) in string.chars().zip(StrChars::from(&string[..])) {
i += 1;
assert_eq!(
c,
s.chars().next().expect("StrChars exhausted early"),
"char {:?} is not equal to string slice {:?}.", c, s,
);
}
assert_eq!(i, string.chars().count(), "");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment