raphlinus/main.rs

## main.rs
// SPDX-License-Identifier: MIT
use proptest::test_runner::TestRunner;
use proptest::strategy::{Strategy, ValueTree};

fn main() {
    let email_re = "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\
    (?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$";
    let re = regex::Regex::new(email_re).unwrap();
    let mut runner = TestRunner::default();
    for _ in 0..100_000_000 {
        let str_val = "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]{0,10}@?[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]{0, 70}".new_tree(&mut runner).unwrap();
        let s = str_val.current();
        let re_match = re.is_match(&s);
        let scan_match = scan_email(&format!("<{}>", s), 1).is_some();
        assert!(re_match == scan_match, "{re_match} {s}");
    }
}

pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
    (data.get(0) == Some(&c)) as usize
}

fn is_ascii_alphanumeric(c: u8) -> bool {
    matches!(c, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z')
}

fn scan_email(text: &str, start_ix: usize) -> Option<(usize, String)> {
    // using a regex library would be convenient, but doing it by hand is not too bad
    let bytes = &text.as_bytes()[start_ix..];
    let mut i = 0;

    while i < bytes.len() {
        let c = bytes[i];
        i += 1;
        match c {
            c if is_ascii_alphanumeric(c) => (),
            b'.' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'/' | b'=' | b'?'
            | b'^' | b'_' | b'`' | b'{' | b'|' | b'}' | b'~' | b'-' => (),
            b'@' if i > 1 => break,
            _ => return None,
        }
    }

    loop {
        let label_start_ix = i;
        let mut fresh_label = true;

        while i < bytes.len() {
            match bytes[i] {
                c if is_ascii_alphanumeric(c) => (),
                b'-' if fresh_label => {
                    return None;
                }
                b'-' => (),
                _ => break,
            }
            fresh_label = false;
            i += 1;
        }

        if i == label_start_ix || i - label_start_ix > 63 || bytes[i - 1] == b'-' {
            return None;
        }

        if scan_ch(&bytes[i..], b'.') == 0 {
            break;
        }
        i += 1;
    }

    if scan_ch(&bytes[i..], b'>') == 0 {
        return None;
    }

    Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into()))
}
	// SPDX-License-Identifier: MIT
	use proptest::test_runner::TestRunner;
	use proptest::strategy::{Strategy, ValueTree};

	fn main() {
	let email_re = "^[a-zA-Z0-9.!#$%&'*+/=?^_`{\|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\
	(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$";
	let re = regex::Regex::new(email_re).unwrap();
	let mut runner = TestRunner::default();
	for _ in 0..100_000_000 {
	let str_val = "[a-zA-Z0-9.!#$%&'+/=?^_`{\|}~-]{0,10}@?[a-zA-Z0-9.!#$%&'+/=?^_`{\|}~-]{0, 70}".new_tree(&mut runner).unwrap();
	let s = str_val.current();
	let re_match = re.is_match(&s);
	let scan_match = scan_email(&format!("<{}>", s), 1).is_some();
	assert!(re_match == scan_match, "{re_match} {s}");
	}
	}

	pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
	(data.get(0) == Some(&c)) as usize
	}

	fn is_ascii_alphanumeric(c: u8) -> bool {
	matches!(c, b'0'..=b'9' \| b'a'..=b'z' \| b'A'..=b'Z')
	}

	fn scan_email(text: &str, start_ix: usize) -> Option<(usize, String)> {
	// using a regex library would be convenient, but doing it by hand is not too bad
	let bytes = &text.as_bytes()[start_ix..];
	let mut i = 0;

	while i < bytes.len() {
	let c = bytes[i];
	i += 1;
	match c {
	c if is_ascii_alphanumeric(c) => (),
	b'.' \| b'!' \| b'#' \| b'$' \| b'%' \| b'&' \| b'\'' \| b'*' \| b'+' \| b'/' \| b'=' \| b'?'
	\| b'^' \| b'_' \| b'`' \| b'{' \| b'\|' \| b'}' \| b'~' \| b'-' => (),
	b'@' if i > 1 => break,
	_ => return None,
	}
	}

	loop {
	let label_start_ix = i;
	let mut fresh_label = true;

	while i < bytes.len() {
	match bytes[i] {
	c if is_ascii_alphanumeric(c) => (),
	b'-' if fresh_label => {
	return None;
	}
	b'-' => (),
	_ => break,
	}
	fresh_label = false;
	i += 1;
	}

	if i == label_start_ix \|\| i - label_start_ix > 63 \|\| bytes[i - 1] == b'-' {
	return None;
	}

	if scan_ch(&bytes[i..], b'.') == 0 {
	break;
	}
	i += 1;
	}

	if scan_ch(&bytes[i..], b'>') == 0 {
	return None;
	}

	Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into()))
	}