Last active
June 5, 2023 08:41
-
-
Save Perigord-Kleisli/4ccfb690fbaf2353d2d786df44da99c5 to your computer and use it in GitHub Desktop.
json parsers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
trait Functor<T, U> { | |
type Out; | |
fn fmap<F: 'static>(self, f: F) -> Self::Out | |
where | |
F: FnOnce(&T) -> U + Clone; | |
} | |
struct Parser<T> { | |
parser: Box<dyn Fn(&String) -> Option<(String, T)>>, | |
} | |
impl<T> Parser<T> | |
where | |
T: 'static + Clone, | |
{ | |
fn new(parser: Box<dyn Fn(&String) -> Option<(String, T)>>) -> Self { | |
Self { parser } | |
} | |
fn run_parser(&self, source: &String) -> Option<(String, T)> { | |
(self.parser)(source) | |
} | |
//Applicative functions: (I couldnt find a way to make an applicative trait in Rust) | |
fn pure(t: T) -> Parser<T> { | |
Parser::new(Box::new(move |str| Some((str.to_string(), t.clone())))) | |
} | |
fn lift_a2<F: 'static, U: 'static + Clone , V: 'static + Clone>( | |
self, | |
p: Parser<U>, | |
f: F, | |
) -> Parser<V> | |
where | |
F: FnOnce(T, U) -> V + Copy, | |
{ | |
Parser::new(Box::new(move |input| { | |
let (input_1, a) = self.run_parser(input)?; | |
let (input_2, b) = p.run_parser(&input_1)?; | |
Some((input_2, f(a, b))) | |
})) | |
} | |
// I was gonna make it polymorphic like in haskell but string uses .char() instead of .iter() | |
fn sequence(parsers: Vec<Parser<char>>) -> Parser<String> { | |
Parser::new(Box::new(move |input| { | |
let mut c_str = input.chars(); | |
let mut out = String::from(""); | |
for parser in parsers.iter() { | |
parser.run_parser(&c_str.clone().collect())?; | |
out.push(c_str.next()?); | |
} | |
Some((c_str.collect(), out)) | |
})) | |
} | |
// Same thing above | |
fn traverse<F>(f: F, source: String) -> Parser<String> | |
where F: Fn (char) -> Parser<char> { | |
let x: Vec<Parser<char>> = source.chars().map(f).collect(); | |
Parser::<String>::sequence(x) | |
} | |
// <* | |
fn left_chain<U: 'static + Clone>(self, p: Parser<U>) -> Parser<T> { | |
self.lift_a2(p, |x, _| x) | |
} | |
// *> | |
fn right_chain<U: Clone + 'static>(self, p: Parser<U>) -> Parser<U> { | |
self.lift_a2(p , |_, y| y) | |
} | |
//Alternative implementataions (again I couldnt figure out how to make an Applicative trait) | |
fn alternative(self, p: Parser<T>) -> Parser<T> { | |
Parser::new(Box::new(move |input| { | |
self.run_parser(input).or(p.run_parser(input)) | |
})) | |
} | |
fn many(self) -> Parser<Vec<T>> { | |
Parser::new(Box::new( move |input_| { | |
let mut input = input_.clone(); | |
let mut out: Vec<T> = vec![]; | |
loop { | |
match self.run_parser(&input) { | |
Some((remainder,output)) => { | |
input = remainder; | |
out.push(output) | |
}, | |
None => return Some((input, out)) | |
} | |
} | |
} | |
)) | |
} | |
} | |
impl<T: 'static + Clone, U: 'static + Clone> Functor<T, U> for Parser<T> { | |
type Out = Parser<U>; | |
fn fmap<F: 'static + Clone>(self: Parser<T>, f: F) -> Self::Out | |
where | |
F: FnOnce(&T) -> U, | |
{ | |
Parser::new(Box::new(move |str| { | |
self.run_parser(str).map(|(a, b)| (a, f.clone()(&b))) | |
})) | |
} | |
} | |
fn char_parser(c: char) -> Parser<char> { | |
Parser::new(Box::new(move |input| { | |
let mut c_str = input.chars(); | |
if Some(c) == c_str.next() { | |
Some((c_str.collect(), c)) | |
} else { | |
None | |
} | |
})) | |
} | |
fn string_parser(source: String) -> Parser<String> { | |
Parser::<String>::traverse(char_parser, source) | |
} | |
fn span_parser<F: 'static + Clone>(predicate: F) -> Parser<String> | |
where F: Fn(&char) -> bool | |
{ | |
Parser::new(Box::new( move |input| | |
Some( | |
(|(a,b)| (b,a))( | |
span(input.to_string(), predicate.clone()) | |
)) | |
)) | |
} | |
// Currently skips the first element | |
fn separator_parser<T: Clone + 'static,U: Clone + 'static>(element: Parser<T>, separator: Parser<U>) -> Parser<Vec<T>> { | |
separator.right_chain(element).many() | |
} | |
fn string_literal() -> Parser<String> { | |
char_parser('"').right_chain(span_parser(|x| *x != '"')).left_chain(char_parser('"')) | |
} | |
fn whitespace() -> Parser<String> { | |
span_parser(|x| x.is_whitespace()) | |
} | |
fn span<F>(s: String, predicate: F) -> (String, String) | |
where F: Fn(&char) -> bool { | |
(|(a,b): (&str,&str)| (a.to_string(), b.to_string())) | |
(s.split_at(s.chars().take_while(predicate).count())) | |
} | |
#[derive(Clone, Debug)] | |
enum Jsonvalue { | |
Null, | |
Bool(bool), | |
Num(i32), | |
String(String), | |
Array(Vec<Jsonvalue>), | |
Object(HashMap<String, Jsonvalue>), | |
} | |
fn json_null() -> Parser<Jsonvalue> { | |
string_parser("null".to_string()) | |
.fmap(|_| Jsonvalue::Null) | |
} | |
fn json_bool() -> Parser<Jsonvalue> { | |
string_parser("true".to_string()) | |
.fmap(|_| Jsonvalue::Bool(true)) | |
.alternative(string_parser("false".to_string()).fmap(|_| Jsonvalue::Bool(false))) | |
} | |
fn json_num() -> Parser<Jsonvalue> { | |
Parser::new(Box::new( |source| { | |
let (num,remains) = span(source.to_string(), |x| x.is_digit(10)); | |
match num.parse::<i32>() { | |
Ok(x) => Some((remains, Jsonvalue::Num(x))), | |
Err(_) => None | |
} | |
} | |
)) | |
} | |
fn json_string() -> Parser<Jsonvalue> { | |
char_parser('"') | |
.right_chain(span_parser(|x| x != &'"')) | |
.left_chain(char_parser('"')) | |
.fmap(|x| Jsonvalue::String(x.to_string())) | |
} | |
//I couldnt figure out how to correctly translate the haskell version too array and object | |
fn json_array() -> Parser<Jsonvalue> { | |
Parser::new(Box::new( |source|{ | |
let (res_1, _) = char_parser('[').right_chain(whitespace()).run_parser(source)?; | |
if let Some((res_2, mut head)) = json_parser().left_chain(whitespace()).fmap(|x| vec![x.clone()]).run_parser(&res_1) { | |
let (res_3, mut body) = separator_parser(json_parser(), whitespace().right_chain(char_parser(',').left_chain(whitespace()))).run_parser(&res_2)?; | |
let (res_4, _) = whitespace().left_chain(char_parser(']')).run_parser(&res_3)?; | |
head.append(&mut body); | |
Some((res_4, Jsonvalue::Array(head))) | |
} else { | |
let (res_2, _) = whitespace().left_chain(char_parser(']')).run_parser(&res_1)?; | |
Some((res_2, Jsonvalue::Array(vec![]))) | |
} | |
} | |
)) | |
} | |
fn json_object() -> Parser<Jsonvalue> { | |
Parser::new(Box::new( |source|{ | |
let (res_1, _) = char_parser('{').right_chain(whitespace()).run_parser(source)?; | |
if let Some((res_2, mut head)) = pair().left_chain(whitespace()).fmap(|x| vec![x.clone()]).run_parser(&res_1) { | |
let (res_3, mut body) = separator_parser(pair(), whitespace().right_chain(char_parser(',').left_chain(whitespace()))).run_parser(&res_2)?; | |
let (res_4, _) = whitespace().left_chain(char_parser('}')).run_parser(&res_3)?; | |
head.append(&mut body); | |
Some((res_4, | |
Jsonvalue::Object( | |
head.into_iter().collect() | |
)) | |
) | |
} else { | |
let (res_2, _) = whitespace().left_chain(char_parser('}')).run_parser(&res_1)?; | |
Some((res_2, | |
Jsonvalue::Object(HashMap::new()) | |
)) | |
} | |
} | |
)) | |
} | |
fn pair() -> Parser<(String,Jsonvalue)> { | |
Parser::new(Box::new( |source| { | |
let (res_1, key) = string_literal().run_parser(&source)?; | |
let (res_2, _) = whitespace().right_chain(char_parser(':')).left_chain(whitespace()).run_parser(&res_1)?; | |
let (res_3, value) = json_parser().run_parser(&res_2)?; | |
Some((res_3, (key,value))) | |
} | |
)) | |
} | |
//Doesnt work with newlines | |
fn json_parser() -> Parser<Jsonvalue> { | |
json_null() | |
.alternative(json_bool()) | |
.alternative(json_num()) | |
.alternative(json_string()) | |
.alternative(json_array()) | |
.alternative(json_object()) | |
} | |
fn main() -> () { | |
let parse_sample = String::from("asd las"); | |
let failing_parse_sample = String::from("bsc las"); | |
let pure_parse = Parser::pure('d'); | |
let char_parse = char_parser('a'); | |
let string_parse = string_parser("asd".to_string()); | |
println!( | |
r#" | |
Parse Example: {} | |
Failing Parse Example: {} | |
Normal Parsers: | |
pure_parser: {:?} | |
//NO WRONG PARSE | |
char_parser: {:?} | |
failed parse: {:?} | |
string_parser: {:?} | |
failed parse: {:?} | |
Json Parsers: | |
json_null("null"): {:?} | |
json_null("nll"): {:?} | |
json_num("234"): {:?} | |
json_num("a34"): {:?} | |
json_string(r\#""123""\#): {:?} | |
json_string(r\#"123""\#): {:?} | |
json_array("[1,1,true,null,\"asd\",[1,2,3]]"): {:?} | |
json_array("[]"): {:?} | |
json_array("[1,1,true,null,\"asd\",[1,2,3]"): {:?} | |
json_object("{{}}"): {:?} | |
json_object("{{ \"asd\": 1, \"bar\": \"asd\" \}}"): {:?} | |
"#, | |
parse_sample, | |
failing_parse_sample, | |
pure_parse.run_parser(&parse_sample), | |
char_parse.run_parser(&parse_sample), | |
char_parse.run_parser(&failing_parse_sample), | |
string_parse.run_parser(&parse_sample), | |
string_parse.run_parser(&failing_parse_sample), | |
json_null().run_parser(&"null".to_string()), | |
json_null().run_parser(&"nll".to_string()), | |
json_num().run_parser(&"234".to_string()), | |
json_num().run_parser(&"a34".to_string()), | |
json_string().run_parser(&"\"123\"".to_string()), | |
json_string().run_parser(&"123\"".to_string()), | |
json_array().run_parser(&"[1,1,true,null,\"asd\",[1,2,3]]".to_string()), | |
json_array().run_parser(&"[]".to_string()), | |
json_array().run_parser(&"[1,1,true,null,\"asd\",[1,2,3]".to_string()), | |
json_object().run_parser(&"{ \"asd\": 1, \"bar\": \"asd\" }".to_string()), | |
json_object().run_parser(&"{}".to_string()), | |
); | |
println!("Sample Json: \n"); | |
let sample_json = r#" | |
{ | |
"nullval": null, | |
"bool true": true, | |
"bool false": false, | |
"string": "Hello World", | |
"arrays": [1,2,true,"hello"], | |
"objects": { | |
"nullval": null, | |
"bool true": true, | |
"bool false": false, | |
"string": "Hello World", | |
"arrays": [1,2,true,"hello"] | |
} | |
} | |
"#; | |
println!("{}", sample_json); // I had to remove newlines for it to work | |
println!("{:?}", json_parser().run_parser(&sample_json.replace("\n", ""))); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//this does not have a json object parser and is incomplete | |
//if you figure out how to make a parser combinator | |
//in the same styles as the others here, | |
//please contact me so I may replace this | |
//monstrosity I have created | |
#include <cstdarg> | |
#include <cstdio> | |
#include <functional> | |
#include <iostream> | |
#include <memory> | |
#include <optional> | |
#include <ranges> | |
#include <string> | |
#include <map> | |
#include <tuple> | |
#include <variant> | |
namespace ranges = std::ranges; | |
namespace views = std::ranges::views; | |
/* | |
* This was hell to create, why does std::function cause so much segfaults | |
* I've resorted to copy capturing every lambda just so I can actually build | |
* this | |
*/ | |
#ifdef __GNUC__ | |
#define UNUSED(x) UNUSED_##x __attribute__((__unused__)) | |
#endif | |
template <typename T> | |
std::ostream &operator<<(std::ostream &stream, std::vector<T> xs) { | |
if (xs.empty()) | |
return stream << "[]"; | |
stream << "["; | |
for (size_t i = 0; i < xs.size() - 1; i++) | |
stream << xs[i] << ","; | |
return stream << xs.back() << "]"; | |
} | |
template <typename T, typename U> | |
std::ostream &operator<<(std::ostream &stream, std::pair<T, U> x) { | |
return stream << "(" << x.first << "," << x.second << ")"; | |
} | |
template <typename T> | |
std::ostream &operator<<(std::ostream &stream, std::optional<T> x) { | |
if (x.has_value()) | |
return stream << "Value(" << x.value() << ")"; | |
else | |
return stream << "Invalid"; | |
} | |
template <class T> T &unmove(T &&t) { return t; } | |
std::pair<std::string_view, std::string_view> | |
span(std::string_view xs, std::function<bool(char)> f) { | |
if (xs.empty()) | |
return std::make_pair("", ""); | |
size_t split = ranges::distance(xs | views::take_while(f)); | |
return std::make_pair(xs.substr(0, split), xs.substr(split)); | |
} | |
template <typename T> | |
using ParserRet = std::optional<std::pair<T, std::string_view>>; | |
template <typename T> | |
using ParserT = std::function<ParserRet<T>(std::string_view)>; | |
template <typename T> struct Parser { | |
explicit Parser(ParserT<T> p) : parser(p) {} | |
ParserRet<T> operator()(std::string_view s) const { return parser(s); } | |
// ParserRet<T> operator()(std::string s) { return parser(s); } | |
const ParserT<T> parser; | |
}; | |
template <typename T, typename U> | |
Parser<U> fmap(Parser<T> parser, std::function<U(T)> f) { | |
return Parser<U>([=](auto s) -> ParserRet<U> { | |
if (auto x = parser(s)) | |
return std::make_pair(f(x->first), x->second); | |
return {}; | |
}); | |
} | |
template <typename T, typename U> | |
Parser<T> operator<(T value, Parser<U> parser) { | |
return fmap<U, T>(parser, [value](U UNUSED(_)) { return value; }); | |
} | |
template <typename T, typename U> | |
Parser<U> operator>(Parser<T> parser, U value) { | |
return fmap<T, U>(parser, [value](T UNUSED(_)) { return value; }); | |
} | |
template <typename T, typename U> | |
Parser<U> operator>>(Parser<T> parserA, Parser<U> parserB) { | |
return Parser<U>([=](std::string_view s) -> ParserRet<U> { | |
if (auto resA = parserA(s)) | |
return parserB(resA->second); | |
return {}; | |
}); | |
} | |
template <typename T, typename U> | |
Parser<T> operator<<(Parser<T> parserA, Parser<U> parserB) { | |
return Parser<T>([=](std::string_view s) -> ParserRet<T> { | |
auto resA = parserA(s); | |
if (!resA) | |
return {}; | |
auto resB = parserB(resA->second); | |
if (!resB) | |
return {}; | |
return std::make_pair(resA->first, resB->second); | |
}); | |
} | |
Parser<char> charP(char c) { | |
return Parser<char>([c](std::string_view s) -> ParserRet<char> { | |
if (s.empty()) | |
return {}; | |
if (s.front() == c) | |
return std::make_pair(s[0], s.substr(1)); | |
return {}; | |
}); | |
} | |
Parser<std::string> stringP(std::string s_input) { | |
return Parser<std::string>( | |
[&s_input](std::string_view s) -> ParserRet<std::string> { | |
if (s.starts_with(s_input)) | |
return std::make_pair(s_input, s.substr(s_input.length())); | |
else | |
return {}; | |
}); | |
} | |
Parser<std::string_view> spanP(std::function<bool(char)> pred) { | |
return Parser<std::string_view>( | |
[pred](const std::string_view &s) -> ParserRet<std::string_view> { | |
return span(s, pred); | |
}); | |
} | |
Parser<int> numP = Parser<int>([](std::string_view s) -> ParserRet<int> { | |
auto [num, rem] = span(s, isdigit); | |
try { | |
return std::make_pair(std::stoi(std::string(num)), rem); | |
} catch (std::invalid_argument &) { | |
return {}; | |
} | |
}); | |
Parser<std::string_view> whitespace = spanP(isspace); | |
Parser<std::string_view> string_literal = charP('"') >> spanP([](char c) { | |
return c != '"'; | |
}) << charP('"'); | |
template <typename T> | |
Parser<std::vector<T>> many(Parser<T> parser) { | |
return Parser<std::vector<T>>( | |
[parser](std::string_view s) -> ParserRet<std::vector<T>> { | |
std::vector<T> out = {}; | |
while (auto x = parser(s)) { | |
out.push_back(x->first); | |
s = x->second; | |
} | |
return std::make_pair(out, s); | |
}); | |
} | |
template <typename T, typename U, typename V> | |
Parser<V> liftA2(std::function<V(T, U)> f, Parser<T> parserA, Parser<U> parserB) { | |
return Parser<V>([=](std::string_view s) -> ParserRet<V> { | |
auto res_a = parserA(s); | |
if (!res_a) | |
return {}; | |
auto res_b = parserB(res_a->second); | |
if (!res_b) | |
return {}; | |
return std::make_pair(f(res_a->first, res_b->first), res_b->second); | |
}); | |
} | |
template <typename T> Parser<T> pure(T x) { | |
return Parser<T>([x](std::string_view s) { return std::make_pair(x, s); }); | |
} | |
template <typename T, typename U> | |
Parser<std::vector<T>> delimP(Parser<T> item, Parser<U> sep) { | |
return Parser<std::vector<T>>( | |
[=](std::string_view s) -> ParserRet<std::vector<T>> const{ | |
auto res_1 = item(s); | |
if (!res_1) return std::make_pair(std::vector<T>({}), s); | |
auto res_2 = many(sep >> item)(res_1->second); | |
if (!res_2) return std::make_pair(std::vector<T>({res_1->first}), res_1->second); | |
std::vector<T>& out = res_2->first; | |
out.insert(out.begin(), res_1->first); | |
return std::make_pair(out, res_2->second); | |
}); | |
} | |
template <typename T> | |
Parser<T> operator|(Parser<T> parserA, Parser<T> parserB) { | |
return Parser<T>([=](std::string_view s) -> ParserRet<T> const { | |
if (auto x = parserA(s)) | |
return x; | |
if (auto x = parserB(s)) | |
return x; | |
return {}; | |
}); | |
} | |
struct _JsonNull {}; | |
struct _JsonBool { | |
bool b; | |
}; | |
struct _JsonNum { | |
int n; | |
}; | |
struct _JsonString { | |
std::string s; | |
}; | |
struct _JsonArray { | |
std::vector< | |
std::variant<_JsonNull, _JsonBool, _JsonNum, _JsonString, _JsonArray>> | |
xs; | |
// god I miss sum types | |
}; | |
using JsonValue = | |
std::variant<_JsonNull, _JsonBool, _JsonNum, _JsonString, _JsonArray>; | |
//I was gonna make a _JsonObject but realized that was quite hard | |
//struct _JsonObject { | |
//std::map<std::string, JsonValue> m; | |
//}; | |
// I had to make these fake constructors so they convert to JsonValue despite | |
// them being in the JsonValue variant because FUCK KNOWS WHY | |
JsonValue JsonNull = _JsonNull{}; | |
JsonValue JsonBool(bool b) { return _JsonBool{b}; } | |
JsonValue JsonNum(int n) { return _JsonNum{n}; } | |
JsonValue JsonString(std::string_view s) { return _JsonString{std::string(s)}; } | |
JsonValue JsonArray(std::vector<JsonValue> xs) { return _JsonArray{xs}; } | |
template <class... Ts> struct overload : Ts... { using Ts::operator()...; }; | |
std::ostream &operator<<(std::ostream &stream, JsonValue val) { | |
std::visit( | |
(overload{[&](const _JsonNull &) { stream << "JsonNull"; }, | |
[&](const _JsonBool &b) { | |
stream << "JsonBool(" << (b.b ? "true" : "false") << ")"; | |
}, | |
[&](const _JsonNum &n) { stream << "JsonNum(" << n.n << ")"; }, | |
[&](const _JsonString &s) { | |
stream << "JsonString(\"" << s.s << "\")"; | |
}, | |
[&](const _JsonArray &xs) { | |
stream << "JsonArray(" << xs.xs << ")"; | |
}}), | |
val); | |
return stream; | |
} | |
template <class... Args> void print(Args... args) { | |
for (auto i : {args...}) | |
std::cout << i << '\n'; | |
std::cout << std::endl; | |
} | |
//I had to change some of these to macros because for some *reason* the values change | |
//I have no idea why the fuck this happens | |
#define jsonNull (JsonNull < stringP("null")) | |
#define jsonBool ((stringP("true") > JsonBool(true)) | (JsonBool(false) < stringP("false"))) | |
const Parser<JsonValue> jsonNum = | |
fmap<int, JsonValue>(numP, [](int x) { return JsonNum(x); }); | |
const Parser<JsonValue> jsonString = fmap<std::string_view, JsonValue>( | |
charP('"') >> spanP([](char c) { return c != '"'; }) << charP('"'), | |
[](const std::string_view &s) { return JsonString(s); }); | |
#define _jsonValue (JsonNull < stringP("null")) \ | |
| jsonString \ | |
| ((stringP("true") > JsonBool(true)) | (JsonBool(false) < stringP("false"))) \ | |
| jsonNum | |
//You cant define a lambda within itself so | |
#define _jsonArray (fmap<std::vector<JsonValue>,JsonValue>(charP('[') >> whitespace >> delimP(_jsonValue << whitespace, charP(',') << whitespace) << charP(']'), \ | |
[](std::vector<JsonValue> xs){ return JsonArray(xs); })) | |
#define jsonArray (fmap<std::vector<JsonValue>,JsonValue>(charP('[') >> delimP((_jsonValue | _jsonArray) << whitespace, charP(',') << whitespace) << charP(']'), \ | |
[](std::vector<JsonValue> xs){ return JsonArray(xs); })) | |
void jsonTests() { | |
print(jsonNull("null")); | |
print(jsonBool("true"), jsonBool("false")); | |
print(jsonNum("123")); | |
print(jsonString("\"hello world\"")); | |
print(jsonArray("[null, true, [1,2,3], false, 1,2, \"hello\"]")); | |
} | |
int main() { | |
jsonTests(); | |
// so I dont have to type all of this every showcase | |
auto a_parse = charP('a'); | |
print(a_parse("aaasd"), a_parse("baasd")); | |
auto asd_parse = stringP("asd"); | |
print(asd_parse("asdbas"), asd_parse("basddsd")); | |
print(numP("123asd"), numP("asd")); | |
print(string_literal("\"asd\""), string_literal("asd")); | |
auto alternate = stringP("dsa") | stringP("asd"); | |
print(alternate("dsa"), alternate("asd")); | |
auto replace = stringP("asd") > "foo"; | |
print(replace("asd"), replace("dsa")); | |
auto many_dsa = many(stringP("dsa")); | |
print(many_dsa("dsadsadsa"), many_dsa("asd")); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE LambdaCase, TupleSections #-} | |
module Main where | |
import Control.Applicative | |
import Control.Monad | |
import Data.Char | |
import qualified Data.Map as M | |
import Text.Printf | |
import Data.Tuple | |
data JsonValue | |
= JsonNull | |
| JsonBool Bool | |
| JsonNumber Int | |
| JsonString String | |
| JsonArray [JsonValue] | |
| JsonObject (M.Map String JsonValue) | |
deriving (Show, Eq) | |
newtype Parser a = Parser { | |
runParser :: String -> Maybe (String, a) | |
} | |
instance Functor Parser where | |
fmap f (Parser parser) = Parser ( (fmap . fmap) f . parser) | |
instance Applicative Parser where | |
pure x = Parser $ Just . (, x) | |
(Parser p1) <*> (Parser p2) = Parser $ \input -> do | |
(input_1, f) <- p1 input | |
(input_2, a) <- p2 input_1 | |
pure (input_2, f a) | |
instance Alternative Parser where | |
empty = Parser $ const Nothing | |
(Parser p1) <|> (Parser p2) = Parser $ \input -> p1 input <|> p2 input | |
charParser :: Char -> Parser Char | |
charParser x = Parser | |
(\case | |
y : ys | y == x -> Just (ys, x) | |
_ -> Nothing | |
) | |
stringParser :: String -> Parser String | |
stringParser = traverse charParser | |
spanParser :: (Char -> Bool) -> Parser String | |
spanParser f = Parser $ Just . swap . span f | |
failOnNull :: Parser [a] -> Parser [a] | |
failOnNull (Parser p) = Parser $ \input -> do | |
val@(input', xs) <- p input | |
(if null xs then Nothing else Just val) | |
whiteSpace :: Parser String | |
whiteSpace = spanParser isSpace | |
stringLiteral :: Parser String | |
stringLiteral = charParser '"' *> spanParser (/='"') <* charParser '"' | |
sepByParser :: Parser b -> Parser a -> Parser [b] | |
sepByParser element sep = (:) <$> element <*> many (sep *> element) | |
<|> pure [] | |
jsonBool :: Parser JsonValue | |
jsonBool = JsonBool . (\case; "true" -> True; _ -> False ) <$> (stringParser "true" <|> stringParser "false") | |
jsonNull :: Parser JsonValue | |
jsonNull = JsonNull <$ stringParser "null" | |
jsonNumber :: Parser JsonValue | |
jsonNumber = JsonNumber . read <$> failOnNull (spanParser isDigit) | |
jsonString :: Parser JsonValue | |
jsonString = JsonString <$> stringLiteral | |
jsonArray :: Parser JsonValue | |
jsonArray = JsonArray <$> ( | |
charParser '[' *> whiteSpace *> | |
jsonValue `sepByParser` (whiteSpace *> charParser ',' <* whiteSpace) | |
<* whiteSpace <* charParser ']' ) | |
jsonObject :: Parser JsonValue | |
jsonObject = JsonObject . mconcat <$> (charParser '{' *> whiteSpace *> | |
pair `sepByParser` (whiteSpace *> charParser ',' <* whiteSpace) | |
<* whiteSpace <* charParser '}') | |
where pair = | |
(\key _ value -> M.singleton key value) | |
<$> stringLiteral | |
<*> (whiteSpace *> charParser ':' <* whiteSpace) | |
<*> jsonValue | |
jsonValue :: Parser JsonValue | |
jsonValue = foldr1 (<|>) [jsonBool, jsonNull, jsonNumber, jsonString, jsonArray, jsonObject] | |
main = do | |
--print . runParser jsonValue =<< readFile "/home/truff/.local/src/sandbox/haskell/large.json" | |
data_ <- readFile "/home/truff/.local/src/sandbox/haskell/input.json" | |
showcase data_ $ runParser jsonValue data_ | |
data_ <- readFile "/home/truff/.local/src/sandbox/haskell/wrong-input.json" | |
showcase data_ $ runParser jsonValue data_ | |
where | |
showcase content (Just (remaining,JsonObject x )) = do | |
putStrLn "-------------" | |
putStrLn $ "Original File" ++ content | |
putStrLn "-------------" | |
putStrLn $ "Null Values: " ++ show (M.lookup "nullval" x) | |
putStrLn $ "Bools: " ++ show (M.lookup "bool true" x) ++ " " ++ show(M.lookup "bool false" x) | |
putStrLn $ "Strings: " ++ show (M.lookup "string" x) | |
putStrLn $ "Arrays: " ++ show (M.lookup "arrays" x) | |
putStrLn $ "Objects: " ++ show (M.lookup "arrays" x) | |
putStrLn $ "Remaining: " ++ show (M.lookup "arrays" x) | |
showcase content _ = do | |
putStrLn "Parse failure" | |
putStrLn "-------------" | |
putStrLn $ "Original File" ++ content | |
putStrLn "-------------" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
use std::fs; | |
#[derive(Debug)] | |
enum JsonValue { | |
JsonNull, | |
JsonBool(bool), | |
JsonNumber(i32), | |
JsonString(String), | |
JsonArray(Vec<JsonValue>), | |
JsonObject(HashMap<String, JsonValue>), | |
} | |
// Not exactly like the parser type in the haskell example, here its just an alias | |
type ParseResult<T> = Option<(String, T)>; | |
fn char_parser(c: char, source: String) -> ParseResult<char> { | |
let mut c_str = source.chars(); | |
if Some(c) == c_str.next() { | |
Some((c_str.collect(), c)) | |
} else { | |
None | |
} | |
} | |
//Keep parsing until failure | |
fn many_parse<T, F>(parser: F, mut source: String) -> ParseResult<Vec<T>> | |
where | |
F: Fn(&String) -> ParseResult<T>, | |
{ | |
let mut out: Vec<T> = vec![]; | |
loop { | |
match parser(&source) { | |
Some((remain, output)) => { | |
source = remain; | |
out.push(output); | |
} | |
None => return Some((source, out)), | |
} | |
} | |
} | |
//Repeatedly uses all parsers in a Vector until it finds a functioning one | |
fn alternate_parse<T>(fa: Vec<fn(String) -> ParseResult<T>>, source: String) -> ParseResult<T> { | |
for parser in fa.iter() { | |
match parser(source.clone()) { | |
Some(x) => return Some(x), | |
None => (), | |
} | |
} | |
None | |
} | |
//Split a list into two from a predicate | |
fn span_parser<F>(f: F, source: String) -> ParseResult<String> | |
where | |
F: Fn(&char) -> bool, | |
{ | |
if source.is_empty() { | |
return Some((source, "".to_string())); | |
} | |
let mut split_val: i32 = -1; | |
for (index, a) in source.chars().enumerate() { | |
if !f(&a) { | |
split_val = index as i32; | |
break; | |
} | |
} | |
if split_val as usize >= source.len() { | |
return Some(("".to_string(), source)); | |
} | |
let (a, b) = source.split_at(split_val as usize); | |
Some((b.to_string(), a.to_string())) | |
} | |
fn delim_parser(l: char, source: String, r: char) -> ParseResult<String> { | |
let (remain_1, _) = char_parser(l, source)?; | |
let (remain_2, out) = span_parser(|x| *x != r, remain_1).unwrap(); | |
let (remain_3, _) = char_parser(r, remain_2)?; | |
Some((remain_3, out)) | |
} | |
fn string_parser(s: String, source: String) -> ParseResult<String> { | |
if s.is_empty() { | |
return None; | |
} | |
let mut source_c = source.chars(); | |
let mut out = String::from(""); | |
for i in s.chars() { | |
if Some(i) == source_c.next() { | |
out.push(i) | |
} else { | |
return None; | |
} | |
} | |
Some((source_c.collect(), out)) | |
} | |
fn json_null(source: String) -> Option<(String, JsonValue)> { | |
string_parser("null".to_string(), source).map(|(a, _)| (a, JsonValue::JsonNull)) | |
} | |
fn json_bool(source: String) -> ParseResult<JsonValue> { | |
match ( | |
string_parser("true".to_string(), source.clone()), | |
string_parser("false".to_string(), source), | |
) { | |
(None, Some((a, _))) => Some((a, JsonValue::JsonBool(false))), | |
(Some((a, _)), None) => Some((a, JsonValue::JsonBool(true))), | |
_ => None, | |
} | |
} | |
fn json_num(source: String) -> ParseResult<JsonValue> { | |
let (a, b) = span_parser(|x| x.is_digit(10), source).unwrap(); | |
match b.parse::<i32>() { | |
Ok(x) => Some((a, JsonValue::JsonNumber(x))), | |
Err(_) => None, | |
} | |
} | |
fn string_literal(source: String) -> ParseResult<String> { | |
delim_parser('"', source, '"') | |
} | |
fn json_string(source: String) -> ParseResult<JsonValue> { | |
delim_parser('"', source, '"').map(|(a, b)| (a, JsonValue::JsonString(b))) | |
} | |
fn json_array(source: String) -> ParseResult<JsonValue> { | |
let (res_1, _) = char_parser('[', source)?; | |
let (res_1a,_) = span_parser(|x| x.is_whitespace(), res_1)?; | |
let (res_2, out) = many_parse(|str| { | |
let(res_, _) = span_parser(|x| x.is_whitespace(), str.to_string())?; | |
let(res, out) = json_value(res_)?; | |
let(res_a, _) = span_parser(|x| x.is_whitespace(), res)?; | |
match char_parser(',', res_a.clone()) { | |
Some((res_b, _)) => Some((res_b, out)), | |
None => Some((res_a, out)) | |
} | |
} | |
, res_1a)?; | |
let (res_2a,_) = span_parser(|x| x.is_whitespace(), res_2)?; | |
let (res_3,_) = char_parser(']', res_2a)?; | |
Some((res_3, JsonValue::JsonArray(out))) | |
} | |
fn json_object(source: String) -> ParseResult<JsonValue> { | |
let (res_1, _) = char_parser('{', source)?; | |
let (res_1a,_) = span_parser(|x| x.is_whitespace(), res_1)?; | |
let (res_2, out) = many_parse(|str| { | |
let(res_, _) = span_parser(|x| x.is_whitespace(), str.to_string())?; | |
let(res, out_1) = string_literal(res_)?; | |
let(res_a, _) = span_parser(|x| x.is_whitespace(), res)?; | |
let(res_a1,_) = char_parser(':', res_a)?; | |
let(res_a2, _) = span_parser(|x| x.is_whitespace(), res_a1)?; | |
let(res_b, out_2) = json_value(res_a2)?; | |
let(res_b1, _) = span_parser(|x| x.is_whitespace(), res_b)?; | |
match char_parser(',', res_b1.clone()) { | |
Some((res_b2,_)) => Some((res_b2, (out_1, out_2))), | |
None => Some((res_b1, (out_1, out_2))) | |
} | |
} | |
, res_1a)?; | |
let (res_2a,_) = span_parser(|x| x.is_whitespace(), res_2)?; | |
let (res_3,_) = char_parser('}', res_2a)?; | |
Some((res_3, | |
JsonValue::JsonObject( | |
out.into_iter().collect::<HashMap<String, JsonValue>>() | |
) | |
)) | |
} | |
fn json_value(source: String) -> ParseResult<JsonValue> { | |
alternate_parse(vec![json_null, json_bool, json_num, json_string, json_array, json_object], source) | |
} | |
fn showcase(data: String, jsonvalue: Option<(String,JsonValue)>) -> (){ | |
match jsonvalue { | |
Some((a,JsonValue::JsonObject(x))) => { | |
println!("------------"); | |
println!("Original file {}", data); | |
println!("------------"); | |
println!("Null Values: {:?}", x.get("nullval") ); | |
println!("bools: {:?}, {:?}", x.get("bool true"), x.get("bool false") ); | |
println!("Strings: {:?}", x.get("string") ); | |
println!("Array: {:?}", x.get("arrays")); | |
println!("Objects: {:?}", x.get("objects")); | |
println!("remaining parse: {:?}", a); | |
}, | |
_ => { | |
println!("\nWrong json:"); | |
println!("------------"); | |
println!("Original file {}", data); | |
println!("------------"); | |
println!("ParseFailure")} | |
} | |
} | |
fn main() -> () { | |
//println!("{:?}", json_value(fs::read_to_string("large.json").unwrap())); | |
let data = fs::read_to_string("input.json").unwrap(); | |
let jsonvalue = json_value(data.clone()); | |
showcase(data, jsonvalue); | |
let data = fs::read_to_string("wrong-input.json").unwrap(); | |
let jsonvalue = json_value(data.clone()); | |
showcase(data, jsonvalue); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
THD