Skip to content

Instantly share code, notes, and snippets.

@labster
Last active December 16, 2015 23:50
Show Gist options
  • Save labster/5516929 to your computer and use it in GitHub Desktop.
Save labster/5516929 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use 5.10.0;
use strict;
use warnings;
my $MAX_LINES = 100_000;
my @FIELDS = (7, 8, 9, 14, 17, 18, 19, 25, 30, 31, 35, 36, 37, 43, 44, 46, 47, 50, 52, 55, 62, 63, 66, 67, 68, 75, 76, 77, 81);
sub nextfilename {
my $num = shift;
return "data$num.txt";
}
sub getdatafields {
my $line = shift @_;
my @fields = @_;
my @F = split /\|/, $line;
return join "|", @F[@fields];
}
my $infilename = shift @ARGV or die "no input specified";
my $infile;
open $infile, $infilename or die "can't open data file";
my $outfile;
open($outfile, ">", nextfilename( 0 ))
or die "can't open " . nextfilename(0) . " for writing";
my $header_row = getdatafields(<$infile>, @FIELDS);
print $outfile $header_row;
my $i = 1;
for my $line (<$infile>) {
if (($i++ % $MAX_LINES) == 0) {
my $k = int($i/$MAX_LINES);
my $l = $i - 1;
print "working on file ". $k . "starting at observation # " . $l . "\n";
close $outfile;
open($outfile, ">", nextfilename( int($i/$MAX_LINES) )) or die "can't open " . nextfilename(int($i/$MAX_LINES)) . " for writing";
my $headarrow = <$infile>;
print $outfile $header_row;
}
say $outfile getdatafields($line, @FIELDS);
}
close $outfile;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment