Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Created July 4, 2023 19:00
Show Gist options
  • Save raphlinus/2fdb1ec32521fde2a73be043c7c7bf68 to your computer and use it in GitHub Desktop.
Save raphlinus/2fdb1ec32521fde2a73be043c7c7bf68 to your computer and use it in GitHub Desktop.
Property testing of email scanner
// SPDX-License-Identifier: MIT
use proptest::test_runner::TestRunner;
use proptest::strategy::{Strategy, ValueTree};
fn main() {
let email_re = "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\
(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$";
let re = regex::Regex::new(email_re).unwrap();
let mut runner = TestRunner::default();
for _ in 0..100_000_000 {
let str_val = "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]{0,10}@?[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]{0, 70}".new_tree(&mut runner).unwrap();
let s = str_val.current();
let re_match = re.is_match(&s);
let scan_match = scan_email(&format!("<{}>", s), 1).is_some();
assert!(re_match == scan_match, "{re_match} {s}");
}
}
pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
(data.get(0) == Some(&c)) as usize
}
fn is_ascii_alphanumeric(c: u8) -> bool {
matches!(c, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z')
}
fn scan_email(text: &str, start_ix: usize) -> Option<(usize, String)> {
// using a regex library would be convenient, but doing it by hand is not too bad
let bytes = &text.as_bytes()[start_ix..];
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
i += 1;
match c {
c if is_ascii_alphanumeric(c) => (),
b'.' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'/' | b'=' | b'?'
| b'^' | b'_' | b'`' | b'{' | b'|' | b'}' | b'~' | b'-' => (),
b'@' if i > 1 => break,
_ => return None,
}
}
loop {
let label_start_ix = i;
let mut fresh_label = true;
while i < bytes.len() {
match bytes[i] {
c if is_ascii_alphanumeric(c) => (),
b'-' if fresh_label => {
return None;
}
b'-' => (),
_ => break,
}
fresh_label = false;
i += 1;
}
if i == label_start_ix || i - label_start_ix > 63 || bytes[i - 1] == b'-' {
return None;
}
if scan_ch(&bytes[i..], b'.') == 0 {
break;
}
i += 1;
}
if scan_ch(&bytes[i..], b'>') == 0 {
return None;
}
Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into()))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment