Last active
September 23, 2023 19:03
-
-
Save Ranlvor/cfaa792d4f2411c9f242874428c2003f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "regex-decompile" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
regex-syntax = "0.7.5" | |
unic-char-range = "0.9.0" | |
itertools = "0.11.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use regex_syntax::{hir::Hir, parse, hir::HirKind, hir::Class::*}; | |
#[macro_use] extern crate unic_char_range; | |
use itertools::Itertools; | |
fn main() { | |
println!("Hello, world!"); | |
let regex = vec!["abc", "ab|cd", "(ab|cd)", "[acf]", "[abc]", "a+", "a*", "a{5,9}", "a{0,0}", "(test)(t){0,0}", "t?", "^nope$", "(ab|cd)[efg] [hi] a{2,3} t?", "\\p{Emoji}"]; | |
for r in regex { | |
println!("{}",r); | |
let hir = parse(r).unwrap(); | |
println!("{:?}\n", get_combinations_for_hir(&hir)); | |
} | |
} | |
fn get_combinations_for_hir(hir: &Hir) -> Result<Vec<String>, String> { | |
match hir.kind() { | |
HirKind::Concat(v) => { | |
let mut sublists = Vec::new(); | |
for h in v.iter() { | |
sublists.push(get_combinations_for_hir(h)?); | |
} | |
let cartesian = sublists.iter().multi_cartesian_product(); | |
/*for result in cartesian { | |
println!(" {:?}", result); | |
let s: String = result.into_iter().map(|x| x.to_string()).collect(); | |
println!(" {:?}", s); | |
}*/ | |
Ok(cartesian.map(|y| y.into_iter().map(|x| x.to_string()).collect()).collect()) | |
} | |
HirKind::Capture(v) => { | |
get_combinations_for_hir(&v.sub) | |
} | |
HirKind::Alternation(v) => { | |
let mut result = Vec::new(); | |
for h in v { | |
result.append(&mut get_combinations_for_hir(h)?); | |
} | |
Ok(result) | |
} | |
HirKind::Literal(v) => { | |
let s = String::from_utf8(v.0.to_vec()); | |
match s { | |
Ok(s) => { | |
let mut result = Vec::new(); | |
result.push(s); | |
Ok(result) | |
}, | |
Err(_) => Err(format!("could not convert {:?} to string", v)) | |
} | |
} | |
HirKind::Class(v) => { | |
match v { | |
Unicode(v) => { | |
let mut result = Vec::new(); | |
for c in v.iter() { | |
let s = c.start(); let e = c.end(); | |
for character in chars!(s..=e) { | |
result.push(character.to_string()); | |
} | |
} | |
Ok(result) | |
}, | |
Bytes(_v) => { Err("Class(Bytes) not implemented".to_string()) }, | |
} | |
} | |
HirKind::Repetition(v) => { | |
if let Some(max) = v.max { | |
let mut result = Vec::new(); | |
let children = get_combinations_for_hir(&v.sub)?; | |
for i in v.min ..= max { | |
if let Ok(i) = i.try_into() { | |
for c in &children { | |
result.push(c.repeat(i)); | |
} | |
} else { | |
return Err(format!("could not convert {} to usize (that should be impossible)", i)); | |
} | |
} | |
Ok(result) | |
} else { | |
return Err(format!("Not implemented for unbounded max: {:?}", hir)); | |
} | |
} | |
HirKind::Empty => { | |
let mut result = Vec::new(); | |
result.push("".to_string()); | |
return Ok(result) | |
} | |
HirKind::Look(_) => Err(format!("Not implemented: {:?}", hir)), | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment