Skip to content

Instantly share code, notes, and snippets.

@labster
Last active December 17, 2015 00:10
Show Gist options
  • Save labster/5519220 to your computer and use it in GitHub Desktop.
Save labster/5519220 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use 5.10.0;
use strict;
use warnings;
use Tie::File;
my $MAX_LINES = 100_000;
my @FIELDS = (7, 8, 9, 14, 17, 18, 19, 25, 30, 31, 35, 36, 37, 43, 44, 46, 47, 50, 52, 55, 62, 63, 66, 67, 68, 75, 76, 77, 81);
sub nextfilename {
my $num = shift;
return "data$num.txt";
}
sub getdatafields {
my $line = shift @_;
my @fields = @_;
my @F = split /\|/, $line;
return join "|", @F[@fields];
}
my $infilename = shift @ARGV or die "no input specified";
my @inputdata;
tie(@inputdata, 'Tie::File', $infilename) or die "can't open data file";
my $outfile;
open($outfile, ">", nextfilename( 0 ))
or die "can't open " . nextfilename(0) . " for writing";
my $header_row = getdatafields(shift(@inputdata), @FIELDS);
print $outfile $header_row;
for(my $i = 1; $i <= $#inputdata; $i++) {
my $line = $inputdata[$i];
if (($i % $MAX_LINES) == 0) {
my $k = int($i/$MAX_LINES);
my $l = $i - 1;
print "working on file ". $k . "starting at observation # " . $l . "\n";
close $outfile;
open($outfile, ">", nextfilename( int($i/$MAX_LINES) )) or die "can't open " . nextfilename(int($i/$MAX_LINES)) . " for writing";
print $outfile $header_row;
}
say $outfile getdatafields($line, @FIELDS);
}
close $outfile;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment