Skip to content

Instantly share code, notes, and snippets.

@deliro
Last active May 15, 2023 22:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deliro/ccd8bdb2aab78a229fe82e64e6549301 to your computer and use it in GitHub Desktop.
Save deliro/ccd8bdb2aab78a229fe82e64e6549301 to your computer and use it in GitHub Desktop.
use serde::Serialize;
use std::collections::HashMap;
use std::io;
use std::process::ExitCode;
#[derive(Clone)]
enum LineSep {
Char(char),
Spaces,
}
impl LineSep {
fn detect(header: &str) -> Self {
header
.chars()
.filter(|ch| ['|', ',', '\t'].contains(ch))
.next()
.map_or(Self::Spaces, |v| Self::Char(v))
}
fn split(&self, line: &str) -> Vec<String> {
match self {
LineSep::Char(v) => line
.split(*v)
.filter_map(|v| {
if v.is_empty() {
None
} else {
Some(v.trim().to_string())
}
})
.collect(),
LineSep::Spaces => line.split_whitespace().map(|v| v.to_string()).collect(),
}
}
}
#[derive(Serialize, Debug, PartialEq)]
struct Node {
values: HashMap<String, String>,
children: Vec<Node>,
}
impl Node {
fn from(header: &Vec<String>, line: &String, sep: &LineSep) -> Self {
let values = sep.split(line.as_str());
let map = header
.into_iter()
.zip(values)
.map(|(k, v)| (k.to_string(), v))
.collect::<HashMap<String, String>>();
Self {
values: map,
children: vec![],
}
}
#[cfg(test)]
fn to_tsv_line(&self, header: &Vec<String>) -> String {
header
.into_iter()
.map(|v| {
self.values
.get(v.as_str())
.and_then(|v| Some(v.to_owned()))
.unwrap_or("".to_string())
})
.collect::<Vec<_>>()
.join("\t")
}
}
#[inline]
fn get_level(line: &String) -> usize {
line.chars()
.into_iter()
.take_while(|v| v.is_whitespace())
.count()
}
fn parse_body(
header: &Vec<String>,
lines: &mut Vec<String>,
level: usize,
sep: &LineSep,
) -> Option<Vec<Node>> {
let mut result: Vec<Node> = Vec::new();
while lines.len() != 0 {
let line = lines.last().unwrap();
let line_level = get_level(line);
if line_level == level {
result.push(Node::from(header, line, &sep));
lines.pop();
} else if line_level > level {
let children = parse_body(header, lines, line_level, &sep)?;
result.last_mut()?.children = children;
} else {
break;
}
}
Some(result)
}
fn parse(inp: &str) -> Option<Vec<Node>> {
let mut lines = inp.lines().filter_map(|v| match v.trim().len() {
0 => None,
_ => Some(v),
});
let header_line = lines.next()?;
let sep = LineSep::detect(header_line);
let header = sep.split(header_line);
let mut body = lines.map(|v| v.to_owned()).rev().collect::<Vec<String>>();
let first_line = &body.last()?.to_string();
let initial_level = get_level(first_line);
if (&body).into_iter().any(|x| get_level(x) < initial_level) {
None
} else {
parse_body(&header, &mut body, initial_level, &sep)
}
}
fn main() -> ExitCode {
let inp = io::stdin()
.lines()
.into_iter()
.map(|v| v.expect("error while reading stdin"))
.collect::<Vec<String>>()
.join("\n");
match parse(inp.as_str()) {
Some(v) => {
let s = serde_json::to_string(&v).expect("error making a json");
println!("{s}");
ExitCode::SUCCESS
}
None => {
eprintln!("parsing error");
ExitCode::FAILURE
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use std::iter::once;
fn all_keys(vals: &Vec<Node>) -> Vec<String> {
vals.into_iter()
.map(|v| v.values.clone().into_keys().chain(all_keys(&v.children)))
.flatten()
.collect::<HashSet<_>>()
.into_iter()
.collect()
}
fn inner_pretty_format(vals: &Vec<Node>, header: &Vec<String>, level: usize) -> Vec<String> {
vals.into_iter()
.map(|v| {
let line = format!("{}{}", "\t".repeat(level), v.to_tsv_line(header));
once(line).chain(inner_pretty_format(&v.children, header, level + 1))
})
.flatten()
.collect()
}
fn pretty_format(vals: &Vec<Node>) -> String {
let mut header = all_keys(&vals);
header.sort();
let header_line = header.join("\t");
once(header_line)
.chain(inner_pretty_format(vals, &header, 0))
.collect::<Vec<_>>()
.join("\n")
}
#[test]
fn weird_case() {
let x = "
key value a-col
hello world one-value
foo bar
x y
dedent value
q w";
let expected = "
a-col key value
one-value hello world
foo bar
x y
dedent value
q w"
.trim_start();
let v = parse(x).unwrap();
assert_eq!(pretty_format(&v), expected)
}
#[test]
fn tabs() {
let x = "
col1 col2 col3
root q w
child1 e r
subchild1 hello world
child2 [ ]
root2";
let expected = "
col1 col2 col3
root q w
child1 e r
subchild1 hello world
child2 [ ]
root2 "
.trim_start();
let v = parse(x).unwrap();
assert_eq!(pretty_format(&v), expected);
}
#[test]
fn plain_tsv() {
let x = "
Name Age Address
Paul 23 1115 W Franklin
Bessy the Cow 5 Big Farm Way
Zeke 45 W Main St
"
.trim_start();
let expected = "
Address Age Name
1115 W Franklin 23 Paul
Big Farm Way 5 Bessy the Cow
W Main St 45 Zeke"
.trim_start();
let v = parse(x).unwrap();
assert_eq!(pretty_format(&v), expected);
}
#[test]
fn pipe_separated() {
let x = "
key | value
foo | bar
x | y"
.trim_start();
let expected = "
key value
foo bar
x y"
.trim_start();
let v = parse(x).unwrap();
assert_eq!(pretty_format(&v), expected);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment