Skip to content

Instantly share code, notes, and snippets.

@dfsnow
Last active December 4, 2022 03:28
Show Gist options
  • Save dfsnow/112621a2d9ea6e19876017fb776cf133 to your computer and use it in GitHub Desktop.
Save dfsnow/112621a2d9ea6e19876017fb776cf133 to your computer and use it in GitHub Desktop.
Code to pivot OD matrices from long to wide (via bash pipe/stdin)
# Create an array of columns (destinations) from separate input file
BEGIN {
FS=OFS=","
while ((getline line <dests) > 0) {
contents = contents line
}
numCols = split(contents,cols)
}
# Create the CSV header by printing each destination
FNR == 1 {
printf "%s", $1
for (c=1; c<=numCols; c++) {
dest = cols[c]
printf "%s%s", OFS, dest
}
print ""
next
}
# Loop through rows, appending each value to an array
# based on its corresponding destination. Once the
# origin changes, print the array, then start a new row
$1 != prev[1] {
if ( FNR > 2 ) {
prt()
}
split($0,prev)
}
{ values[$2] = $3 }
END { prt() }
function prt(destination, value, c) {
printf "%s", prev[1]
for (c=1; c<=numCols; c++) {
destination = cols[c]
value = values[destination]
printf "%s%d", OFS, value
}
print ""
delete values
}
use itertools::Itertools;
use std::collections::BTreeMap;
use std::env::args;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::iter::Iterator;
use std::path::Path;
fn main() {
// Read single arg, the file path containing a list of destinations
let args: Vec<String> = args().collect();
let file_path = &args[1];
// Load destinations from the file and print first row
let lines = lines_from_file(file_path).expect("Could not load lines");
let joined_lines = lines.join(",");
println!("origin,{}", joined_lines);
// Create a BTreeMap using destinations as keys
let mut dests: BTreeMap<String, String> = BTreeMap::new();
for line in lines {
dests.insert(line, String::new());
}
let mut map = dests.clone();
// Populate rows (BTree values) from piped input
// Print to stdout once entire group (origin) is processed
let mut prev_origin: String = String::new();
let stdin = io::stdin();
for line in stdin.lock().lines().skip(1) {
let line = line.expect("Could not read line from standard in");
// Split each line into origin, destination, and minutes
let mut iter = line.splitn(3, ',');
let (origin, destination, value) = (
iter.next().unwrap().trim(),
iter.next().unwrap().trim(),
iter.next().unwrap().trim(),
);
if origin != prev_origin {
if !prev_origin.is_empty() {
let values: String = map.values().join(",");
println!("{},{}", prev_origin, values);
}
map = dests.clone();
}
map.insert(destination.to_string(), value.to_string());
prev_origin.clear();
prev_origin.push_str(origin);
}
let values: String = map.values().join(",");
println!("{},{}", prev_origin, values);
}
fn lines_from_file(filename: impl AsRef<Path>) -> io::Result<Vec<String>> {
BufReader::new(File::open(filename)?).lines().collect()
}
use std::env::args;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::iter::repeat;
use std::path::Path;
fn main() {
// Read single arg, the file path containing a list of destinations
let args: Vec<String> = args().collect();
let file_path = &args[1];
// Load destinations from the file and print first row
let lines = lines_from_file(file_path).expect("Could not load lines");
let joined_lines = lines.join(",");
println!("origin,{}", joined_lines);
// Convert destinations to a vector, then create a vector of
// commas with the same length
let keys = joined_lines.split(',').collect::<Vec<_>>();
let commas = String::from_iter(repeat(',').take(keys.len()));
let mut prev_origin = "".to_string();
let mut line = String::new();
let mut stdin = io::stdin().lock();
stdin
.read_line(&mut line)
.expect("Could not read first line from standard in");
let mut idx: usize = 0;
let mut line_out = String::with_capacity(joined_lines.len());
loop {
line.clear();
match stdin.read_line(&mut line) {
Ok(0) => break,
Err(_) => panic!("Could not read line from standard in"),
_ => (),
};
// Split each read line into origin, destination, and minutes
let mut iter = line.splitn(3, ',');
let (origin, destination, value) = (
iter.next().unwrap().trim(),
iter.next().unwrap().trim(),
iter.next().unwrap().trim(),
);
// When the origin changes write the line of values to stdout
// then clear all allocations
if origin != prev_origin {
if !prev_origin.is_empty() {
line_out.push_str(&commas[idx..]);
println!("{}", line_out);
}
idx = 0;
prev_origin.clear();
prev_origin.push_str(origin);
line_out.clear();
line_out.push_str(origin);
}
// If the destination doesn't match the key, insert a comma
// otherwise, insert the minute value into our allocated vector
let mut skipped: usize = 0;
while keys[idx + skipped] != destination {
skipped += 1;
}
line_out.push_str(&commas[..skipped + 1]);
line_out.push_str(value.trim());
idx += skipped + 1;
}
line_out.push_str(&commas[idx..]);
println!("{}", line_out);
}
fn lines_from_file(filename: impl AsRef<Path>) -> io::Result<Vec<String>> {
BufReader::new(File::open(filename)?).lines().collect()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment