Skip to content

Instantly share code, notes, and snippets.

@rust-play
Created November 17, 2022 00:37
Show Gist options
  • Save rust-play/5e53ccff274bf7f66e7e8aec5375eda3 to your computer and use it in GitHub Desktop.
Save rust-play/5e53ccff274bf7f66e7e8aec5375eda3 to your computer and use it in GitHub Desktop.
Code shared from the Rust Playground
#![allow(unused)]
use lazy_static::lazy_static;
use regex::Regex;
use std::borrow::Cow;
#[derive(Debug)]
struct MdReplace {
re: Regex,
open_tag: &'static str,
close_tag: &'static str,
start_fn: fn(),
end_fn: fn(),
}
impl MdReplace {
fn perform_replace<'t>(&self, s: &'t str) -> Cow<'t, str> {
self.re
.replace_all(s, format!("{}$content{}", self.open_tag, self.close_tag))
}
}
lazy_static! {
static ref CODE_BLOCK: MdReplace = MdReplace {
re: Regex::new(r#"(?m)(?P<open>```)(?P<content>(?s).*?)(?P<close>```)"#).unwrap(),
open_tag: "<rustc_codeblock>",
close_tag: "</rustc_codeblock>",
start_fn: || {println!("start_fn called for CODE_BLOCK")},
end_fn: || {println!("end_fn called for CODE_BLOCK")}
};
static ref CODE_INLINE: MdReplace = MdReplace {
re: Regex::new(r#"(?m)(?P<open>`)(?P<content>(?s).*?)(?P<close>`)"#).unwrap(),
open_tag: "<rustc_codeinline>",
close_tag: "</rustc_codeinline>",
start_fn: || {println!("start_fn called for CODE_INLINE")},
end_fn: || {println!("end_fn called for CODE_INLINE")}
};
static ref HEADING1: MdReplace = MdReplace {
re: Regex::new(r#"(?m)^(?P<open>#){1}\s+(?P<content>.*)(?P<close>)"#).unwrap(),
open_tag: "<rustc_h1>",
close_tag: "</rustc_h1>",
start_fn: || {println!("start_fn called for HEADING1")},
end_fn: || {println!("end_fn called for HEADING1")}
};
static ref HEADING2: MdReplace = MdReplace {
re: Regex::new(r#"(?m)^(?P<open>#){2}\s+(?P<content>.*)(?P<close>)"#).unwrap(),
open_tag: "<rustc_h2>",
close_tag: "</rustc_h2>",
start_fn: || {println!("start_fn called for HEADING2")},
end_fn: || {println!("end_fn called for HEADING2")}
};
static ref HEADING3: MdReplace = MdReplace {
re: Regex::new(r#"(?m)^(?P<open>#){3}\s+(?P<content>.*)(?P<close>)"#).unwrap(),
open_tag: "<rustc_h3>",
close_tag: "</rustc_h3>",
start_fn: || {println!("start_fn called for HEADING3")},
end_fn: || {println!("end_fn called for HEADING3")}
};
static ref HEADING4: MdReplace = MdReplace {
re: Regex::new(r#"(?m)^(?P<open>#){4}\s+(?P<content>.*)(?P<close>)"#).unwrap(),
open_tag: "<rustc_h4>",
close_tag: "</rustc_h4>",
start_fn: || {println!("start_fn called for HEADING4")},
end_fn: || {println!("end_fn called for HEADING4")}
};
static ref BOLD: MdReplace = MdReplace {
re: Regex::new(r#"(?m)(?P<open>\*\*)(?P<content>(?s).+?)(?P<close>\*\*)"#).unwrap(),
open_tag: "<rustc_bold>",
close_tag: "</rustc_bold>",
start_fn: || {println!("start_fn called for BOLD")},
end_fn: || {println!("end_fn called for BOLD")}
};
static ref ITALIC: MdReplace = MdReplace {
re: Regex::new(r#"(?m)[$ ](?P<open>_)(?P<content>(?s).+?)(?P<close>_)"#).unwrap(),
open_tag: "<rustc_italic>",
close_tag: "</rustc_italic>",
start_fn: || {println!("start_fn called for ITALIC")},
end_fn: || {println!("end_fn called for ITALIC")}
};
static ref LIST: MdReplace = MdReplace {
re: Regex::new(r#"(?m)^(?P<open>[-*]\s)(?P<content>.*?)(?P<close>)$"#).unwrap(),
open_tag: "<rustc_li>",
close_tag: "</rustc_li>",
start_fn: || {println!("start_fn called for LIST")},
end_fn: || {println!("end_fn called for LIST")}
};
// Note: the only important ordering is that `CODE_BLOCK` must come before `CODE_INLINE` because
// they could overlap otherwise
static ref MD_KEY_LIST: [&'static MdReplace; 9] = [
&CODE_BLOCK,
&CODE_INLINE,
&HEADING1,
&HEADING2,
&HEADING3,
&HEADING4,
&BOLD,
&ITALIC,
&LIST
];
}
/// Replace markdown formatters with asymmetric tags
fn md_replace_tags(s: &str) -> String {
// Regex isn't super performant here because we reallocate for each
// pattern if we do replace something. However, our outputs are short
// and most things won't be matched, so it isn't too bad here.
let mut res = s.to_owned();
for md_key in MD_KEY_LIST.iter() {
res = md_key.perform_replace(&res).into_owned();
}
res
}
fn perform_tagged_callbacks(s: &str) -> Vec<u8> {
let mut ret: Vec<u8> = Vec::new();
let mut skip = 0usize;
let input = s.as_bytes();
'str_iter: for i in 0..input.len() {
if skip > 0 {
skip -= 1;
continue;
}
let remaining_slice = &input[i..input.len()];
'md_key_iter: for md_key in MD_KEY_LIST.iter() {
if remaining_slice.starts_with(md_key.open_tag.as_bytes()) {
(md_key.start_fn)();
ret.extend_from_slice(b"[o]"); // Remove for real use
skip = md_key.open_tag.as_bytes().len() - 1;
continue 'str_iter;
}
if remaining_slice.starts_with(md_key.close_tag.as_bytes()) {
(md_key.end_fn)();
ret.extend_from_slice(b"[c]"); // Remove for real use
skip = md_key.close_tag.as_bytes().len() - 1;
continue 'str_iter;
}
}
// println!("{ret:?}");
ret.push(input[i]);
}
ret
}
fn main() {
println!("======== Start ========\n{MD_INPUT}\n=======================\n\n\n");
let rep = md_replace_tags(MD_INPUT);
println!("======= Replaced ======\n{rep}\n=======================\n\n\n");
let out = perform_tagged_callbacks(&rep);
let out_s = std::str::from_utf8(&out).unwrap();
println!("========= Out =========\n{out_s}\n=======================\n");
}
const MD_INPUT: &str = r#"
# Head1
Some `inline code`
```
code block here;
more code;
```
Further `inline`, some **bold**, a bit of _italics
wrapped across lines_.
Let's end with a list:
- Item1
- Item2
## Heading 2: Things we won't cover
"#;
const MD_OUTPUT: &str = r#"
<rustc_h1>Head1</rustc_h1>
Some <rustc_codeinline>inline code</rustc_codeinline>
<rustc_codeblock>
code block here;
more code;
</rustc_codeblock>
Further <rustc_codeinline>inline</rustc_codeinline>, some <rustc_bold>bold</rustc_bold>, a bit of<rustc_italic>italics
wrapped across lines</rustc_italic>.
Let's end with a list:
<rustc_li>Item1</rustc_li>
<rustc_li>Item2</rustc_li>
<rustc_h2>Heading 2: Things we won't cover</rustc_h2>
"#;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_replace() {
assert_eq!(MD_OUTPUT, md_replace_tags(MD_INPUT));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment