Skip to content

Instantly share code, notes, and snippets.

@Geal
Created February 15, 2019 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Geal/84775215be3b4d5978173165373c7dbb to your computer and use it in GitHub Desktop.
Save Geal/84775215be3b4d5978173165373c7dbb to your computer and use it in GitHub Desktop.
#[macro_use]
extern crate nom;
#[macro_use]
extern crate bencher;
extern crate fnv;
use fnv::FnvHashMap as HashMap;
use bencher::{Bencher, black_box};
use nom::{digit, be_u32, IResult, Err, ErrorKind, InputTakeAtPosition, Convert, recognize_float,
ParseTo, Slice, InputLength, Needed,HexDisplay};
named!(first<u32>, flat_map!(digit, parse_to!(u32)));
named!(second<u32>, call!(be_u32));
fn or<'b, I: Clone, O, E>(input: I, fns: &'b[&'b Fn(I) -> IResult<I, O, E>]) -> IResult<I, O, E> {
let mut index = 0;
for f in fns.iter() {
match f(input.clone()) {
Err(Err::Error(_)) => {},
rest => return rest,
}
}
Err(Err::Error(error_position!(input, ErrorKind::Alt)))
}
fn separated<I: Clone, O1, O2, O3, E, F, G, H>(input: I, first: F, sep: G, second: H) -> IResult<I, (O1, O3), E>
where F: Fn(I) -> IResult<I, O1, E>,
G: Fn(I) -> IResult<I, O2, E>,
H: Fn(I) -> IResult<I, O3, E> {
let (input, o1) = first(input)?;
let (input, _) = sep(input)?;
second(input).map(|(i, o2)| (i, (o1, o2)))
}
fn delimited<I: Clone, O1, O2, O3, E, F, G, H>(input: I, first: F, sep: G, second: H) -> IResult<I, O2, E>
where F: Fn(I) -> IResult<I, O1, E>,
G: Fn(I) -> IResult<I, O2, E>,
H: Fn(I) -> IResult<I, O3, E> {
let (input, _) = first(input)?;
let (input, o2) = sep(input)?;
second(input).map(|(i, _)| (i, o2))
}
fn take_while<'a, T: 'a, F>(input: &'a [T], cond: F) -> IResult<&'a [T], &'a [T]>
where F: Fn(T) -> bool,
&'a [T]: nom::InputTakeAtPosition<Item=T> {
input.split_at_position(cond)
}
fn map<I, O1, O2, F, G>(input: I, first: F, second: G) -> IResult<I, O2>
where F: Fn(I) -> IResult<I, O1>,
G: Fn(O1) -> O2 {
first(input).map(|(i, o1)| (i, second(o1)))
}
fn flat_map<I: Clone+From<O1>, O1, O2, F, G>(input: I, first: F, second: G) -> IResult<I, O2>
where F: Fn(I) -> IResult<I, O1>,
G: Fn(O1) -> IResult<O1, O2> {
let (i, o1) = first(input)?;
second(o1).map(|(_, o2)| (i, o2)).map_err(Err::convert)
}
fn many0<I: Clone+InputLength, O, F>(input: I, mut f: F) -> IResult<I, Vec<O>>
where F: FnMut(I) -> IResult<I, O> {
let mut i = input;
let mut acc = Vec::new();
loop {
let i_ = i.clone();
match f(i_) {
Err(_) => return Ok((i, acc)),
Ok((i2, o)) => {
if i.input_len() == i2.input_len() {
return Err(Err::Error(error_position!(i, ErrorKind::Many0)))
}
i = i2;
acc.push(o);
if i.input_len() == 0 {
return Ok((i, acc));
}
}
}
}
}
fn separated_list<I: Clone+InputLength, O, O2, F, G>(input: I, mut sep: G, mut f: F) -> IResult<I, Vec<O>>
where F: FnMut(I) -> IResult<I, O>,
G: FnMut(I) -> IResult<I, O2> {
let mut acc = Vec::new();
let (input, o) = f(input)?;
acc.push(o);
let mut i = input;
loop {
if i.input_len() == 0 {
return Ok((i, acc));
}
let i_ = i.clone();
match sep(i_) {
Err(_) => return Ok((i, acc)),
Ok((i2, _)) => {
if i.input_len() == i2.input_len() {
return Err(Err::Error(error_position!(i, ErrorKind::Many0)))
}
let i2_ = i2.clone();
match f(i2_) {
Err(_) => return Ok((i, acc)),
Ok((i3, o)) => {
if i2.input_len() == i3.input_len() {
return Err(Err::Error(error_position!(i, ErrorKind::Many0)))
}
i = i3;
acc.push(o);
}
}
}
}
}
}
//fn char<F>(c: char) -> F
// where F: Fn(&[u8]) -> IResult<&[u8], char> {
fn char(c: char) -> impl Fn(&[u8]) -> IResult<&[u8], char> {
move |i:&[u8]| {
if i.len() == 0 {
Err(Err::Incomplete(Needed::Unknown))
} else {
//beware of utf8
if i[0] as char == c {
Ok((&i[1..], c))
} else {
Err(Err::Error(error_position!(i, ErrorKind::Char)))
}
}
}
}
fn tag<'b, 'a: 'b>(t: &'a [u8]) -> impl Fn(&'b [u8]) -> IResult<&'b [u8], &'b [u8]> {
move |i:&'b [u8]| {
tag!(i, t)
}
}
fn value<I, O1, O2, F>(input: I, f: F, o: O2) -> IResult<I, O2>
where F: Fn(I) -> IResult<I, O1> {
f(input).map(|(i, _)| (i, o))
}
/****************************/
fn parser(input: &[u8]) -> IResult<&[u8], u32> {
or(input, &[&first, &second])
}
pub fn is_string_character(c: u8) -> bool {
//FIXME: should validate unicode character
c != b'"' && c != b'\\'
}
pub fn is_space(c: u8) -> bool {
c == b' ' || c == b'\t' || c == b'\r' || c == b'\n'
}
//named!(sp, take_while!(is_space));
fn sp(input: &[u8]) -> IResult<&[u8], &[u8]> {
take_while(input, is_space)
}
fn sp2(input: &[u8]) -> IResult<&[u8], &[u8]> {
let chars = b" \t\r\n";
take_while(input, |c| chars.contains(&c))
}
//named!(float<f64>, flat_map!(recognize_float, parse_to!(f64)));
fn float<'a>(i: &'a [u8]) -> IResult<&'a [u8], f64> {
let second = |i: &'a [u8]| {
match i.parse_to() {
Some(o) => Ok((i.slice(i.input_len()..), o)),
None => Err(Err::Error(error_position!(i, ErrorKind::ParseTo)))
}
};
flat_map(i, recognize_float, second)
}
#[derive(Debug, PartialEq)]
pub enum JsonValue<'a> {
Str(&'a str),
Boolean(bool),
Num(f64),
Array(Vec<JsonValue<'a>>),
Object(HashMap<&'a str, JsonValue<'a>>),
}
use std::str;
fn parse_str(input: &[u8]) -> IResult<&[u8], &str> {
map_res!(input,
escaped!(take_while1!(is_string_character), '\\', one_of!("\"bfnrt\\")),
str::from_utf8
)
}
fn string(input: &[u8]) -> IResult<&[u8], &str> {
delimited(input, char('\"'), parse_str, char('\"'))
}
fn boolean(input: &[u8]) -> IResult<&[u8], bool> {
or(input, &[
&|i| { value(i, tag(&b"false"[..]), false) },
&|i| { value(i, tag(&b"true"[..]), true) }
])
}
fn array(input: &[u8]) -> IResult<&[u8], Vec<JsonValue>> {
delimited(input,
char('['),
|i| separated_list(i, char(','), json_value),
char(']')
)
}
fn key_value(input: &[u8]) -> IResult<&[u8], (&str, JsonValue)> {
separated(input, string, char(':'), json_value)
}
fn hash_internal(input: &[u8]) -> nom::IResult<&[u8], HashMap<&str, JsonValue>> {
match key_value(input) {
Err(nom::Err::Error(_)) => Ok((input, HashMap::default())),
Err(e) => Err(e),
Ok((i, (key, value))) => {
let mut map = HashMap::default();
map.insert(key, value);
let mut input = i;
loop {
match do_parse!(input, sp >> char!(',') >> kv: key_value >> (kv)) {
Err(nom::Err::Error(_)) => break Ok((input, map)),
Err(e) => break Err(e),
Ok((i, (key, value))) => {
map.insert(key, value);
input = i;
}
}
}
}
}
}
named!(
hash<HashMap<&str, JsonValue>>,
delimited!(
char!('{'),
return_error!(
hash_internal
),
preceded!(sp, char!('}'))
)
);
fn json_value(input: &[u8]) -> IResult<&[u8], JsonValue> {
or(input, &[
&|i| { map(i, string, JsonValue::Str) },
&|i| { map(i, float, JsonValue::Num) },
&|i| { map(i, array, JsonValue::Array) },
&|i| { map(i, hash, JsonValue::Object) },
&|i| { map(i, boolean, JsonValue::Boolean) },
])
}
fn root(input: &[u8]) -> IResult<&[u8], JsonValue> {
or(input, &[
&|i| { map(i, array, JsonValue::Array) },
&|i| { map(i, hash, JsonValue::Object) },
])
}
/*
named!(
value<JsonValue>,
preceded!(sp, alt!(
map!(string, JsonValue::Str) |
map!(float, JsonValue::Num) |
map!(array, JsonValue::Array) |
map!(hash, JsonValue::Object) |
map!(boolean, JsonValue::Boolean)
))
);
named!(
root<JsonValue>,
delimited!(
call!(sp),
alt!(
map!(hash, JsonValue::Object) |
map!(array, JsonValue::Array)
),
not!(complete!(sp))
)
);
*/
fn test_many(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
let mut counter = 0;
let res = many0(input,
|i| {
counter = counter + 1;
tag!(i, "abcd")
});
println!("counter: {}", counter);
res
}
#[test]
fn manytest() {
test_many(&b"abcdabcdabcd"[..]);
panic!();
}
fn basic(b: &mut Bencher) {
let data = b"{\"a\":42,\"b\":[\"x\",\"y\",12],\"c\":{\"hello\":\"world\"}}";
b.bytes = data.len() as u64;
parse(b, &data[..])
}
fn parse<'a>(b: &mut Bencher, buffer: &'a[u8]) {
assert!(root(buffer).is_ok());
b.iter(|| {
let mut buf = black_box(buffer);
match root(buf) {
Ok((i, o)) => {
return o;
}
Err(err) => {
if let &nom::Err::Error(nom::Context::Code(ref i, ref e)) = &err {
panic!("got err {:?} at:\n{}", e, i.to_hex(16));
} else {
panic!("got err: {:?}", err)
}
},
}
});
}
benchmark_group!(json, basic);
benchmark_main!(json);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment