Created
June 16, 2024 16:12
-
-
Save Krish120003/369a892ba7189d3b91b91845e60a1ffa to your computer and use it in GitHub Desktop.
A JSON Parser in 500 Lines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::collections::HashMap; | |
| #[derive(Debug)] | |
| enum JSONParseError { | |
| Error(usize), | |
| NotFound, | |
| UnexpectedChar(usize), | |
| MissingClosing(usize), | |
| } | |
| #[derive(Debug, Clone, PartialEq)] | |
| enum JSONValue { | |
| Null, | |
| True, | |
| False, | |
| Number(f64), | |
| String(String), | |
| Array(Vec<JSONValue>), | |
| Object(HashMap<String, JSONValue>), | |
| } | |
| // consume whitespace and return the remaining string | |
| fn ws(src: &str) -> &str { | |
| src.trim_start_matches(&[' ', '\n', '\t', '\r']) | |
| } | |
| fn string(mut src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| match src.strip_prefix("\"") { | |
| Some(rest) => src = rest, | |
| None => return Err(JSONParseError::NotFound), | |
| }; | |
| // now we keep going until we find the first " | |
| // lets just "find" the first " | |
| let mut result: String = "".to_string(); | |
| let mut escaping = false; | |
| let mut chars = src.chars(); | |
| loop { | |
| let c = match chars.next() { | |
| Some(c) => c, | |
| None => return Err(JSONParseError::MissingClosing(src.len())), | |
| }; | |
| // if we have the \, then we are escaping, but don't add anything to result | |
| if c == '\\' && !escaping { | |
| escaping = true; | |
| } | |
| // if we have the end quote but we are not escaping, then we are done | |
| else if c == '"' && !escaping { | |
| break; | |
| } else if escaping { | |
| // if we are escaping, then we need to check for special characters | |
| match c { | |
| '"' => result.push('"'), // quotation mark | |
| '\\' => result.push('\\'), // reverse solidus | |
| '/' => result.push('/'), // solidus | |
| 'b' => result.push('\u{0008}'), // backspace | |
| 'f' => result.push('\u{000c}'), // form feed | |
| 'n' => result.push('\n'), // line feed | |
| 'r' => result.push('\r'), // carriage return | |
| 't' => result.push('\t'), // tab | |
| _ => { | |
| // can't escape whatever this is | |
| return Err(JSONParseError::UnexpectedChar(chars.count())); | |
| } | |
| } | |
| escaping = false; | |
| } else { | |
| result.push(c); | |
| } | |
| } | |
| Ok((chars.as_str(), JSONValue::String(result))) | |
| } | |
| // numbers are weird | |
| fn onenine(src: &str) -> Result<(&str, char), JSONParseError> { | |
| // check the first character of the string | |
| match src.chars().next() { | |
| // if the character exists | |
| Some(c) => { | |
| // check if it is numeric | |
| if c.is_numeric() { | |
| // if it is, we have to make sure it's not 0 | |
| if c == '0' { | |
| return Err(JSONParseError::NotFound); | |
| } | |
| Ok((&src[1..], c)) | |
| } else { | |
| Err(JSONParseError::NotFound) | |
| } | |
| } | |
| None => Err(JSONParseError::NotFound), | |
| } | |
| } | |
| fn digit(src: &str) -> Result<(&str, char), JSONParseError> { | |
| // check the first character of the string | |
| match src.chars().next() { | |
| // if the character exists | |
| Some('0') => Ok((&src[1..], '0')), | |
| Some(_) => onenine(src), | |
| None => Err(JSONParseError::NotFound), | |
| } | |
| } | |
| fn digits(mut src: &str) -> Result<(&str, Vec<char>), JSONParseError> { | |
| let mut res = vec![]; | |
| loop { | |
| match digit(src) { | |
| Ok((rest, c)) => { | |
| src = rest; | |
| res.push(c); | |
| } | |
| Err(_) => { | |
| break; | |
| } | |
| } | |
| } | |
| if res.is_empty() { | |
| return Err(JSONParseError::NotFound); | |
| } | |
| Ok((src, res)) | |
| } | |
| fn integer(mut src: &str) -> Result<(&str, i64), JSONParseError> { | |
| // first check for negative symbol. | |
| let negative; | |
| match src.strip_prefix("-") { | |
| Some(rest) => { | |
| src = rest; | |
| negative = true; | |
| } | |
| None => { | |
| negative = false; | |
| } | |
| } | |
| // try to parse onenine, then digits | |
| match onenine(src) { | |
| Ok((rest, c)) => match digits(rest) { | |
| Ok((leftover, mut digis)) => { | |
| digis.insert(0, c); | |
| let int_str: String = digis.iter().collect(); | |
| let mut resulting_int: i64 = int_str.parse::<i64>().unwrap(); | |
| if negative { | |
| resulting_int *= -1; | |
| } | |
| return Ok((leftover, resulting_int)); | |
| } | |
| Err(_) => {} | |
| }, | |
| Err(_) => {} | |
| } | |
| match digit(src) { | |
| Ok((rest, c)) => { | |
| let mut n: i64 = c.to_digit(10).unwrap().into(); | |
| if negative { | |
| n *= -1; | |
| } | |
| Ok((rest, n)) | |
| } | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn fraction(src: &str) -> Result<(&str, f64), JSONParseError> { | |
| match src.strip_prefix(".") { | |
| Some(rest) => match digits(rest) { | |
| Ok((leftover, mut digis)) => { | |
| digis.insert(0, '.'); | |
| digis.insert(0, '0'); | |
| let fraction_str: String = digis.iter().collect(); | |
| let fraction_part = fraction_str.parse::<f64>().unwrap(); | |
| return Ok((leftover, fraction_part)); | |
| } | |
| Err(e) => Err(e), | |
| }, | |
| None => Ok((src, 0.0)), | |
| } | |
| } | |
| fn exponent(mut src: &str) -> Result<(&str, i64), JSONParseError> { | |
| let first_char = src.chars().next(); | |
| if first_char == Some('e') || first_char == Some('E') { | |
| src = &src[1..]; | |
| } else { | |
| return Ok((src, 0)); | |
| } | |
| let mut negative = false; | |
| let sign_char = src.chars().next(); | |
| if sign_char == Some('+') { | |
| // do nothing and skip | |
| src = &src[1..]; | |
| } else if sign_char == Some('-') { | |
| negative = true; | |
| src = &src[1..]; | |
| } | |
| // ok now digits | |
| match digits(src) { | |
| Ok((rest, digis)) => { | |
| let num_str: String = digis.iter().collect(); | |
| let mut num: i64 = num_str.parse::<i64>().unwrap(); | |
| if negative { | |
| num *= -1; | |
| } | |
| return Ok((rest, num)); | |
| } | |
| Err(e) => return Err(e), | |
| }; | |
| } | |
| fn number(mut src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| let mut result; | |
| let negative; | |
| match integer(src) { | |
| Ok((rest, num)) => { | |
| result = num.abs() as f64; | |
| negative = num.is_negative(); | |
| src = rest; | |
| } | |
| Err(e) => return Err(e), | |
| }; | |
| match fraction(src) { | |
| Ok((rest, frac)) => { | |
| result += frac; | |
| src = rest; | |
| } | |
| Err(JSONParseError::NotFound) => {} | |
| Err(e) => return Err(e), | |
| } | |
| match exponent(src) { | |
| Ok((rest, exponent)) => { | |
| src = rest; | |
| let multipier = 10_f64.powf(exponent as f64); | |
| result *= multipier; | |
| } | |
| Err(JSONParseError::NotFound) => {} | |
| Err(e) => return Err(e), | |
| } | |
| if negative { | |
| result *= -1.0; | |
| } | |
| Ok((src, JSONValue::Number(result))) | |
| } | |
| fn bool(src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| match src.strip_prefix("true") { | |
| Some(rest) => Ok((rest, JSONValue::True)), | |
| None => match src.strip_prefix("false") { | |
| Some(rest) => Ok((rest, JSONValue::False)), | |
| None => Err(JSONParseError::NotFound), | |
| }, | |
| } | |
| } | |
| fn null(src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| match src.strip_prefix("null") { | |
| Some(rest) => Ok((rest, JSONValue::Null)), | |
| None => Err(JSONParseError::NotFound), | |
| } | |
| } | |
| fn value(src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| match object(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), | |
| } | |
| match array(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), // if any other error, propogate it up | |
| } | |
| match string(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), // if any other error, propogate it up | |
| } | |
| match number(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), // if any other error, propogate it up | |
| } | |
| match bool(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), // if any other error, propogate it up | |
| }; | |
| match null(src) { | |
| Ok(res) => return Ok(res), | |
| Err(JSONParseError::NotFound) => {} // if not found, that ok | |
| Err(e) => return Err(e), // if any other error, propogate it up | |
| }; | |
| Err(JSONParseError::NotFound) | |
| } | |
| fn element(mut src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| src = ws(src); | |
| match value(src) { | |
| Ok((rest, v)) => Ok((ws(rest), v)), | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn elements(mut src: &str) -> Result<(&str, Vec<JSONValue>), JSONParseError> { | |
| let mut values = vec![]; | |
| loop { | |
| match element(src) { | |
| Ok((rest, v)) => { | |
| src = rest; | |
| values.push(v); | |
| } | |
| Err(e) => return Err(e), | |
| } | |
| // now we wanna consume the first character of src | |
| // if it is a comma, or break otherwise | |
| if src.chars().next() == Some(',') { | |
| src = &src[1..]; | |
| } else { | |
| break; | |
| } | |
| } | |
| Ok((src, values)) | |
| } | |
| fn array(mut src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| // first we must parse the [] character | |
| match src.strip_prefix("[") { | |
| Some(rest) => src = ws(rest), | |
| None => return Err(JSONParseError::NotFound), | |
| }; | |
| // if this is true... then we have just parsed whitespace and there are no elements. | |
| // thus, return empty array | |
| if src.chars().next() == Some(']') { | |
| src = &src[1..]; | |
| return Ok((src, JSONValue::Array(vec![]))); | |
| } | |
| // otherwise, parse elemnts and return that | |
| match elements(src) { | |
| Ok((src, v)) => { | |
| if src.chars().next() == Some(']') { | |
| Ok((&src[1..], JSONValue::Array(v))) | |
| } else { | |
| Err(JSONParseError::MissingClosing(src.len())) | |
| } | |
| } | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn object(mut src: &str) -> Result<(&str, JSONValue), JSONParseError> { | |
| // first we must parse the [] character | |
| match src.strip_prefix("{") { | |
| Some(rest) => src = ws(rest), | |
| None => return Err(JSONParseError::NotFound), | |
| }; | |
| // if this is true... then we have just parsed whitespace and there are no elements. | |
| // thus, return empty array | |
| if src.chars().next() == Some('}') { | |
| src = &src[1..]; | |
| // TODO: | |
| return Ok((src, JSONValue::Object(HashMap::new()))); | |
| } | |
| // otherwise, parse elemnts and return that | |
| match members(src) { | |
| Ok((src, v)) => { | |
| if src.chars().next() == Some('}') { | |
| let mut map: HashMap<String, JSONValue> = HashMap::new(); | |
| v.iter().for_each(|(key, value)| { | |
| map.insert(key.to_owned(), value.to_owned()); | |
| }); | |
| Ok((&src[1..], JSONValue::Object(map))) | |
| } else { | |
| Err(JSONParseError::MissingClosing(src.len())) | |
| } | |
| } | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn members(mut src: &str) -> Result<(&str, Vec<(String, JSONValue)>), JSONParseError> { | |
| let mut values = vec![]; | |
| loop { | |
| match member(src) { | |
| Ok((rest, v)) => { | |
| src = rest; | |
| values.push(v); | |
| } | |
| Err(e) => return Err(e), | |
| } | |
| // now we wanna consume the first character of src, if it is a comma | |
| // or break otherwise | |
| if src.chars().next() == Some(',') { | |
| src = &src[1..]; | |
| } else { | |
| break; | |
| } | |
| } | |
| Ok((src, values)) | |
| } | |
| fn member(mut src: &str) -> Result<(&str, (String, JSONValue)), JSONParseError> { | |
| src = ws(src); | |
| match string(src) { | |
| Ok((rest, JSONValue::String(key))) => { | |
| src = rest; | |
| src = ws(src); | |
| // now expect a ":" | |
| if src.chars().next() == Some(':') { | |
| src = &src[1..]; | |
| match element(src) { | |
| Ok((rest, el)) => return Ok((rest, (key, el))), | |
| Err(e) => return Err(e), | |
| } | |
| } else { | |
| return Err(JSONParseError::UnexpectedChar(src.len())); | |
| } | |
| } | |
| Ok((_, _)) => Err(JSONParseError::Error(src.len())), | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn parse(src: &str) -> Result<JSONValue, JSONParseError> { | |
| match element(src) { | |
| Ok((_, res)) => Ok(res), | |
| Err(e) => Err(e), | |
| } | |
| } | |
| fn main() { | |
| let src = "{ \"Hello\": [5, true, false, null, \"World\"] }"; | |
| let res = parse(&src); | |
| match res { | |
| Ok(v) => println!("{:?}", v), | |
| Err(e) => println!("{:?}", e), | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment