-
-
Save deliro/ccd8bdb2aab78a229fe82e64e6549301 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use serde::Serialize; | |
use std::collections::HashMap; | |
use std::io; | |
use std::process::ExitCode; | |
#[derive(Clone)] | |
enum LineSep { | |
Char(char), | |
Spaces, | |
} | |
impl LineSep { | |
fn detect(header: &str) -> Self { | |
header | |
.chars() | |
.filter(|ch| ['|', ',', '\t'].contains(ch)) | |
.next() | |
.map_or(Self::Spaces, |v| Self::Char(v)) | |
} | |
fn split(&self, line: &str) -> Vec<String> { | |
match self { | |
LineSep::Char(v) => line | |
.split(*v) | |
.filter_map(|v| { | |
if v.is_empty() { | |
None | |
} else { | |
Some(v.trim().to_string()) | |
} | |
}) | |
.collect(), | |
LineSep::Spaces => line.split_whitespace().map(|v| v.to_string()).collect(), | |
} | |
} | |
} | |
#[derive(Serialize, Debug, PartialEq)] | |
struct Node { | |
values: HashMap<String, String>, | |
children: Vec<Node>, | |
} | |
impl Node { | |
fn from(header: &Vec<String>, line: &String, sep: &LineSep) -> Self { | |
let values = sep.split(line.as_str()); | |
let map = header | |
.into_iter() | |
.zip(values) | |
.map(|(k, v)| (k.to_string(), v)) | |
.collect::<HashMap<String, String>>(); | |
Self { | |
values: map, | |
children: vec![], | |
} | |
} | |
#[cfg(test)] | |
fn to_tsv_line(&self, header: &Vec<String>) -> String { | |
header | |
.into_iter() | |
.map(|v| { | |
self.values | |
.get(v.as_str()) | |
.and_then(|v| Some(v.to_owned())) | |
.unwrap_or("".to_string()) | |
}) | |
.collect::<Vec<_>>() | |
.join("\t") | |
} | |
} | |
#[inline] | |
fn get_level(line: &String) -> usize { | |
line.chars() | |
.into_iter() | |
.take_while(|v| v.is_whitespace()) | |
.count() | |
} | |
fn parse_body( | |
header: &Vec<String>, | |
lines: &mut Vec<String>, | |
level: usize, | |
sep: &LineSep, | |
) -> Option<Vec<Node>> { | |
let mut result: Vec<Node> = Vec::new(); | |
while lines.len() != 0 { | |
let line = lines.last().unwrap(); | |
let line_level = get_level(line); | |
if line_level == level { | |
result.push(Node::from(header, line, &sep)); | |
lines.pop(); | |
} else if line_level > level { | |
let children = parse_body(header, lines, line_level, &sep)?; | |
result.last_mut()?.children = children; | |
} else { | |
break; | |
} | |
} | |
Some(result) | |
} | |
fn parse(inp: &str) -> Option<Vec<Node>> { | |
let mut lines = inp.lines().filter_map(|v| match v.trim().len() { | |
0 => None, | |
_ => Some(v), | |
}); | |
let header_line = lines.next()?; | |
let sep = LineSep::detect(header_line); | |
let header = sep.split(header_line); | |
let mut body = lines.map(|v| v.to_owned()).rev().collect::<Vec<String>>(); | |
let first_line = &body.last()?.to_string(); | |
let initial_level = get_level(first_line); | |
if (&body).into_iter().any(|x| get_level(x) < initial_level) { | |
None | |
} else { | |
parse_body(&header, &mut body, initial_level, &sep) | |
} | |
} | |
fn main() -> ExitCode { | |
let inp = io::stdin() | |
.lines() | |
.into_iter() | |
.map(|v| v.expect("error while reading stdin")) | |
.collect::<Vec<String>>() | |
.join("\n"); | |
match parse(inp.as_str()) { | |
Some(v) => { | |
let s = serde_json::to_string(&v).expect("error making a json"); | |
println!("{s}"); | |
ExitCode::SUCCESS | |
} | |
None => { | |
eprintln!("parsing error"); | |
ExitCode::FAILURE | |
} | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use std::collections::HashSet; | |
use std::iter::once; | |
fn all_keys(vals: &Vec<Node>) -> Vec<String> { | |
vals.into_iter() | |
.map(|v| v.values.clone().into_keys().chain(all_keys(&v.children))) | |
.flatten() | |
.collect::<HashSet<_>>() | |
.into_iter() | |
.collect() | |
} | |
fn inner_pretty_format(vals: &Vec<Node>, header: &Vec<String>, level: usize) -> Vec<String> { | |
vals.into_iter() | |
.map(|v| { | |
let line = format!("{}{}", "\t".repeat(level), v.to_tsv_line(header)); | |
once(line).chain(inner_pretty_format(&v.children, header, level + 1)) | |
}) | |
.flatten() | |
.collect() | |
} | |
fn pretty_format(vals: &Vec<Node>) -> String { | |
let mut header = all_keys(&vals); | |
header.sort(); | |
let header_line = header.join("\t"); | |
once(header_line) | |
.chain(inner_pretty_format(vals, &header, 0)) | |
.collect::<Vec<_>>() | |
.join("\n") | |
} | |
#[test] | |
fn weird_case() { | |
let x = " | |
key value a-col | |
hello world one-value | |
foo bar | |
x y | |
dedent value | |
q w"; | |
let expected = " | |
a-col key value | |
one-value hello world | |
foo bar | |
x y | |
dedent value | |
q w" | |
.trim_start(); | |
let v = parse(x).unwrap(); | |
assert_eq!(pretty_format(&v), expected) | |
} | |
#[test] | |
fn tabs() { | |
let x = " | |
col1 col2 col3 | |
root q w | |
child1 e r | |
subchild1 hello world | |
child2 [ ] | |
root2"; | |
let expected = " | |
col1 col2 col3 | |
root q w | |
child1 e r | |
subchild1 hello world | |
child2 [ ] | |
root2 " | |
.trim_start(); | |
let v = parse(x).unwrap(); | |
assert_eq!(pretty_format(&v), expected); | |
} | |
#[test] | |
fn plain_tsv() { | |
let x = " | |
Name Age Address | |
Paul 23 1115 W Franklin | |
Bessy the Cow 5 Big Farm Way | |
Zeke 45 W Main St | |
" | |
.trim_start(); | |
let expected = " | |
Address Age Name | |
1115 W Franklin 23 Paul | |
Big Farm Way 5 Bessy the Cow | |
W Main St 45 Zeke" | |
.trim_start(); | |
let v = parse(x).unwrap(); | |
assert_eq!(pretty_format(&v), expected); | |
} | |
#[test] | |
fn pipe_separated() { | |
let x = " | |
key | value | |
foo | bar | |
x | y" | |
.trim_start(); | |
let expected = " | |
key value | |
foo bar | |
x y" | |
.trim_start(); | |
let v = parse(x).unwrap(); | |
assert_eq!(pretty_format(&v), expected); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment