Skip to content

Instantly share code, notes, and snippets.

@Ranlvor
Last active September 23, 2023 19:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ranlvor/cfaa792d4f2411c9f242874428c2003f to your computer and use it in GitHub Desktop.
Save Ranlvor/cfaa792d4f2411c9f242874428c2003f to your computer and use it in GitHub Desktop.
[package]
name = "regex-decompile"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
regex-syntax = "0.7.5"
unic-char-range = "0.9.0"
itertools = "0.11.0"
use regex_syntax::{hir::Hir, parse, hir::HirKind, hir::Class::*};
#[macro_use] extern crate unic_char_range;
use itertools::Itertools;
fn main() {
println!("Hello, world!");
let regex = vec!["abc", "ab|cd", "(ab|cd)", "[acf]", "[abc]", "a+", "a*", "a{5,9}", "a{0,0}", "(test)(t){0,0}", "t?", "^nope$", "(ab|cd)[efg] [hi] a{2,3} t?", "\\p{Emoji}"];
for r in regex {
println!("{}",r);
let hir = parse(r).unwrap();
println!("{:?}\n", get_combinations_for_hir(&hir));
}
}
fn get_combinations_for_hir(hir: &Hir) -> Result<Vec<String>, String> {
match hir.kind() {
HirKind::Concat(v) => {
let mut sublists = Vec::new();
for h in v.iter() {
sublists.push(get_combinations_for_hir(h)?);
}
let cartesian = sublists.iter().multi_cartesian_product();
/*for result in cartesian {
println!(" {:?}", result);
let s: String = result.into_iter().map(|x| x.to_string()).collect();
println!(" {:?}", s);
}*/
Ok(cartesian.map(|y| y.into_iter().map(|x| x.to_string()).collect()).collect())
}
HirKind::Capture(v) => {
get_combinations_for_hir(&v.sub)
}
HirKind::Alternation(v) => {
let mut result = Vec::new();
for h in v {
result.append(&mut get_combinations_for_hir(h)?);
}
Ok(result)
}
HirKind::Literal(v) => {
let s = String::from_utf8(v.0.to_vec());
match s {
Ok(s) => {
let mut result = Vec::new();
result.push(s);
Ok(result)
},
Err(_) => Err(format!("could not convert {:?} to string", v))
}
}
HirKind::Class(v) => {
match v {
Unicode(v) => {
let mut result = Vec::new();
for c in v.iter() {
let s = c.start(); let e = c.end();
for character in chars!(s..=e) {
result.push(character.to_string());
}
}
Ok(result)
},
Bytes(_v) => { Err("Class(Bytes) not implemented".to_string()) },
}
}
HirKind::Repetition(v) => {
if let Some(max) = v.max {
let mut result = Vec::new();
let children = get_combinations_for_hir(&v.sub)?;
for i in v.min ..= max {
if let Ok(i) = i.try_into() {
for c in &children {
result.push(c.repeat(i));
}
} else {
return Err(format!("could not convert {} to usize (that should be impossible)", i));
}
}
Ok(result)
} else {
return Err(format!("Not implemented for unbounded max: {:?}", hir));
}
}
HirKind::Empty => {
let mut result = Vec::new();
result.push("".to_string());
return Ok(result)
}
HirKind::Look(_) => Err(format!("Not implemented: {:?}", hir)),
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment