Skip to content

Instantly share code, notes, and snippets.

@matthallamew
Last active September 16, 2015 15:45
Show Gist options
  • Save matthallamew/dc9902bb2fdaa62b0132 to your computer and use it in GitHub Desktop.
Save matthallamew/dc9902bb2fdaa62b0132 to your computer and use it in GitHub Desktop.
Analyze and display user defined columns from a CSV file.
#!/usr/bin/env perl
#
# Analyze CSV files in order to see only the columns you specify.
# Display the headers of the CSV in order to get column names and/or numbers.
# Use the column names/numbers in order to only display those columns to standard out.
#
use strict;
use Getopt::Std;
my %options=();
getopts("c:laf:s:", \%options);
init();
sub init{
if($options{f}){
if($options{l}){
listhead();
}
elsif($options{a}){
if($options{c}){
analyze();
} else { die "Did not include -c <column name>,<column name>\n\n"; }
}
}
else { die "Did not include -f <filename>\n\n"; }
}
###############
#
# Display the header of the input file
#
# ./csvTool -l -f file.csv [-s '\|']
#
###############
sub listhead {
my $filName;
my $separator;
# Check to make sure the proper switch and a filename were provided
if($options{f}){
$filName = "$options{f}";
}else { die "Did not include -f <filename>\n\n";}
$separator = "$options{s}" || ",";
# Read first line to get header row
open(FILE1,"<", "$filName") or die "Could not read from $filName!";
my $header = <FILE1>;
my @fileFields = split(/$separator/, uc($header));
my $fileFieldsLen = @fileFields;
my %index;
@index{@fileFields} = (0..$#fileFields);
for my $field (@fileFields){
my $idx = $index{$field} + 1;
print "$idx $field\n";
}
}
###############
#
# Analyze input file
#
# ./csvTool -a -f file.csv -c 1,2,3 [-s '\t']
#
# ./csvTool -a -f file.csv -c id,email [-s ';']
#
###############
sub analyze {
my $filName;
my @inFields;
my $separator;
# Check to make sure the proper switch and a filename were provided
if($options{f}){
$filName = "$options{f}";
}else { die "Did not include -f <filename>\n\n"; }
if($options{c}){
@inFields = split(/,/,$options{c});
}else { die "Did not include -c <column name>,<column name>\n\n"; }
$separator = "$options{s}" || ",";
# Read first line to get header row
open(FILE1,"<", "$filName") or die "Could not read from $filName!";
my $header = <FILE1>;
my @fileFields = split(/$separator/, uc($header));
my $fileFieldsLen = @fileFields;
# Get column numbers of each user requested field
my $useFields = undef;
my %index;
@index{@fileFields} = (0..$#fileFields);
for my $inField (@inFields){
$inField =~ s/^\s+|\s+$//g;
$inField = uc($inField);
if($inField =~ /[0-9]/){
for(my $c = 0; $c < $fileFieldsLen; $c++){
if($c eq $inField - 1){
$useFields .= $inField - 1 .",";
last;
}
}
} else{
$useFields .= "$index{$inField},";
}
}
$useFields = join(",",split(/,/,$useFields));
# Read the rest of the file, displaying only those columns that the user requested
my $cnt = 0;
while (!eof(FILE1)) {
my $line = <FILE1>;
$line =~ tr/\"//d;
chomp($line);
my @row = split(/$separator/,$line);
my $reportRows = undef;
for my $c (split(/,/,$useFields)){
$reportRows = $reportRows . "$row[$c] ";
}
print "$reportRows\n";
$cnt++;
}
close FILE1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment