Skip to content

Instantly share code, notes, and snippets.

@tallpeak
Created June 23, 2022 19:22
Show Gist options
  • Save tallpeak/d9e2c1c67315e62b3df02afda65cd047 to your computer and use it in GitHub Desktop.
Save tallpeak/d9e2c1c67315e62b3df02afda65cd047 to your computer and use it in GitHub Desktop.
superfile to tab-separated-values
// fixedWidthToCSV
// output is short by 545 lines
// wc c:\Users\tallp\Downloads\SFA8 c:\temp\sfa8.txt
// 1167801 12474922 411065952 c:\Users\tallp\Downloads\SFA8
// 1167256 40205289 449393560 c:\temp\sfa8.txt
// 2335057 52680211 860459512 total
// https://www.youtube.com/watch?v=lLWchWTUFOQ
// Ryan Levick
// oxide.computer
extern crate flate2;
use std::env;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::io::LineWriter;
use std::io::Write;
// use encoding::{DecoderTrap};//,Encoding
// use encoding::all::ISO_8859_1;
// use encoding_rs::*;
use flate2::read::GzDecoder;
//use bstr::io::BufReadExt;
// use std::fmt::Write;
// the following assumes utf8
// The output is wrapped in a Result to allow matching on errors
// Returns an Iterator to the Reader of the lines of the file.
// fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
// where P: AsRef<Path>, {
// let file = File::open(filename)?;
// Ok(io::BufReader::with_capacity(65536,file).lines())
// }
// This program converts a fixed-width file called Superfile or SFA8.TXT
// to tab-delimited for import by Postgresql, using
// the COPY FROM statement
fn main() -> std::io::Result<()> {
let sftxt_path = Path::new("c:\\temp\\sfa8.txt");
// Open a file in write-only mode, returns `io::Result<File>`
let sftxt_file = match File::create(&sftxt_path) {
Err(why) => panic!("couldn't create sfa8.txt: {}", why),
Ok(file) => file,
};
let mut sftxt = LineWriter::with_capacity(65536, sftxt_file);
let sflens: [u8; 34] = [
2, 8, 6, 5, 7, 1,30,30,30,30,
13, 5, 4,30,10, 9, 9, 4, 4, 8,
8, 8, 8, 8, 5, 3, 3, 5,12, 9,
11, 3,17, 5];
let homedir = dirs::home_dir().unwrap().to_str().unwrap().to_string();
let filename = homedir.clone() + "\\Downloads\\SFA8.gz";
let file = File::open(filename)?;
let gz = GzDecoder::new(file);//.expect("couldn't decode gzip stream");
let mut rdr = io::BufReader::with_capacity(65536,gz);
let mut buf:Vec<u8> = Vec::with_capacity(500);
//IBM437 does not exist in encoding_rs
// let enc = WINDOWS_1252
let enc = encoding_rs::Encoding::for_label(b"l1").expect("failed to find encoding"); //ISO_8859_1
println!("using encoding: {}", enc.name());
//if let Ok(lines) = read_lines("C:\\Users\\tallp\\Downloads\\SFA8") {
while let Ok(bytesread) = rdr.read_until(0x0A as u8, &mut buf) {
if bytesread == 0 {
break;
}
let mut s = String::with_capacity(400);
let mut p : usize = 0;
for l in sflens {
//let t: String = ln.drain(..l as usize).collect();
//s.push_str(ln.substring(p,l))
let slice = &buf[p..(p + l as usize)];
//let utf = enc.decode(slice, DecoderTrap::Replace).unwrap();
let (cow, _encoding_used, _had_errors) = enc.decode(slice);
s.push_str(cow.trim());
s.push_str("\t");
p += l as usize;
}
s.replace_range(s.len()-1..,"\n");
match sftxt.write_all(s.as_bytes()) {
Err(why) => panic!("couldn't write to sfa8.txt: {}", why),
Ok(_) => ()
}
buf.clear();
}
sftxt.flush()?;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment