Created
November 30, 2015 10:32
-
-
Save NeoTech/1cff9495076ed171c202 to your computer and use it in GitHub Desktop.
csv2xlsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# http://cpansearch.perl.org/src/HMBRAND/Text-CSV_XS-1.01/examples/csv2xls | |
# csv2xls: Convert csv to xls | |
# (m)'11 [06 Oct 2011] Copyright H.M.Brand 2007-2013 | |
# modified 09/24/2013-11/05/2013 David Rotman, Cedarville University | |
# Many of these changes are stylistic changes based on personal preferences. | |
# The original script by H.M.Brand can be used without concern for accurateness | |
# and functionality. | |
# | |
# This version of the script explicitly identifies input and output files, so this | |
# version is not suitable for piping (receiving input from a previous program | |
# rather than from a file) nor for processing several input files in one invocation. | |
# The specification of input file and output files fits well in the Unidata/Universe | |
# environment (used by the Cedarville Download utility), whereas piping is not essential. | |
# | |
# Summary of changes: | |
# produce xlsx file instead of xls | |
# change script name to csv2xlsx.pl | |
# do not add 'xlsx' suffix to output filename if filename is specified explicitly | |
# include some additional error checking | |
# rename some variables ($title becomes $inputfile, $xlsx becomes $outputfile, etc.) | |
# adjust status display messages based on existing verbosity option | |
# make some cosmetic code changes (use explicit if statements, show more structure on setup) | |
# allow specifying input and output files via command-line switches (-i and -o) | |
# Dependencies: | |
# /usr/bin/perl -MCPAN -e'install Text::CSV_XS' | |
# /usr/bin/perl -MCPAN -e'install Date::Calc' | |
# /usr/bin/perl -MCPAN -e 'install Excel::Writer::XLSX' | |
# | |
use strict; | |
use warnings; | |
#our $VERSION = "1.71"; base script from H.M.Brand | |
my $scriptversion = "2.01 (David Rotman, Cedarville University, 11/05/2013)"; | |
my $separator = ''; # Set after reading first line in attempt to auto-detect | |
my $quotechar = '"'; | |
my $escapechar = '"'; | |
my $wanthelp = 0; | |
my $mincolumnwidth = 4; # Default minimal column width | |
my $inputfile = ''; # Input file (CSV, tab delimited, etc.) | |
my $outputfile = ''; # Excel output file name | |
my $forcefile = 0; # Force use of file (do not prompt prior to overwriting) | |
my $utf = 0; # Data is encoded in Unicode | |
my $allowformulas = 0; # Allow formulas | |
my $dateformat = "dd-mm-yyyy"; | |
my $currencyformat = '$###,###,##0.00'; | |
my $verbosity = 1; # range from 1 to 4 | |
my $datecolumns; | |
my $remainingchar; | |
my $firstchar; | |
sub usage | |
{ | |
my $err = shift and select STDERR; | |
print <<EOU; | |
usage: perl csv2xlsx.pl [-s <sep>] [-e <esc>] [-q <quot>] [-w <width>] [-d <dtfmt>] | |
[-F] [-f] [-D <cols>] [-u] [-v <level>] | |
[-o <outputfilename>] | |
[-i <inputfilename>] | |
[inputfilename] [outputfilename] | |
-s <sep> use <sep> as separator character, auto-detect, default = ',' | |
The string "tab" is allowed. | |
-e <esc> use <esc> as escape character, auto-detect, default = '$escapechar' | |
The string "undef" is allowed. | |
-q <quot> use <quot> as quotation character, default = '$quotechar' | |
The string "undef" will disable quotation. | |
-w <width> use <width> as minimum column width, default = $mincolumnwidth | |
-d <dtfmt> use <dtfmt> as date format, default = '$dateformat' | |
-F allow formulas, otherwise fields starting with an equal sign | |
are forced to string | |
-f force usage of <outputfilename> if it already exists (unlink before use) | |
-D <cols> only convert dates in columns <cols>, default is everywhere | |
-u input file is stored in UTF8 character set | |
-v <level> verbosity, default = $verbosity, maximum = 4 | |
-i <inputfilename> use <inputfilename> for input to the routine | |
-o <outputfilename> write output to file named <outputfilename>, defaults | |
to input file name with .csv replaced with .xlsx | |
Examples: | |
perl csv2xlsx.pl sales.csv (creates sales.xlsx) | |
perl csv2xlsx.pl -o report.xlsx sales.csv (creates report.xlsx) | |
perl csv2xlsx.pl -i sales.csv -o report.xlsx (creates report.xlsx) | |
perl csv2xlsx.pl sales.csv report.xlsx (creates report.xlsx) | |
perl csv2xlsx.pl -v 4 sales.csv report.xlsx (creates report.xlsx, shows progress msgs) | |
EOU | |
exit $err; | |
} # usage | |
use Getopt::Long qw(:config bundling nopermute passthrough); | |
GetOptions ( | |
"help|h|?" => \$wanthelp , | |
"s=s" => \$separator, | |
"q=s" => \$quotechar, | |
"e=s" => \$escapechar, | |
"w=i" => \$mincolumnwidth, | |
"o=s" => \$outputfile, | |
"i=s" => \$inputfile, | |
"d=s" => \$dateformat, | |
"D=s" => \$datecolumns, | |
"f" => \$forcefile, | |
"F" => \$allowformulas, | |
"u" => \$utf, | |
"v:1" => \$verbosity, | |
); | |
if ($wanthelp) { | |
usage(1); | |
} | |
if ($verbosity > 1) { | |
print STDERR "\ncsv2xlsx.pl script version " . $scriptversion . "\n\n"; | |
} | |
my $numargs = $#ARGV + 1; | |
if (($numargs < 1) and ($inputfile eq "")) { | |
usage(1); | |
} | |
if ($inputfile eq "") { | |
$inputfile = $ARGV[0]; | |
} | |
if ($verbosity > 1) { | |
print STDERR "Input file is " . $inputfile . "\n"; | |
} | |
if (!(-f $inputfile)) { | |
print STDERR $inputfile . " does not exist.\n"; | |
exit 1; | |
} | |
if ($outputfile eq "") { | |
if ($numargs > 1) { | |
$outputfile = $ARGV[1]; | |
} else { | |
($outputfile ||= $inputfile) =~ s/(?:\.csv)?$/.xlsx/i; | |
} | |
} | |
if ($verbosity > 1) { | |
print STDERR "Output file is " . $outputfile . "\n"; | |
} | |
-s $outputfile && $forcefile and unlink $outputfile; | |
if (-s $outputfile) { | |
print STDERR "File '$outputfile' already exists. Overwrite? [y/N] > N\b"; | |
scalar <STDIN> =~ m/^[yj](es|a)?$/i or exit; | |
} | |
# Don't split ourselves when modules do it _much_ better, and follow the standards | |
use Text::CSV_XS; | |
use Date::Calc qw( Delta_Days Days_in_Month ); | |
use Excel::Writer::XLSX; | |
use Encode qw( from_to ); | |
my $csv; | |
my $wbk = Excel::Writer::XLSX->new ($outputfile); | |
my $wks = $wbk->add_worksheet (); | |
$dateformat =~ s/j/y/g; | |
my %fmt = ( | |
date => $wbk->add_format ( | |
num_format => $dateformat, | |
align => "center", | |
), | |
rest => $wbk->add_format ( | |
align => "left", | |
), | |
); | |
my $wbkcurrencyformat = $wbk->add_format(); | |
$wbkcurrencyformat->set_num_format($currencyformat); | |
my ($numrows, $numcols, @numcols) = (0, 1); # data height, data width, and default column widths | |
my $row; | |
my $firstline = ''; | |
open my $inputfh, $inputfile or die "unable to open " . $inputfile . "\n"; | |
unless ($separator) { # No sep char passed, try to auto-detect; | |
while (<$inputfh>) { | |
m/\S/ or next; # Skip empty leading blank lines | |
$separator = # start auto-detect with quoted strings | |
m/["\d];["\d;]/ ? ";" : | |
m/["\d],["\d,]/ ? "," : | |
m/["\d]\t["\d,]/ ? "\t" : | |
# If neither, then for unquoted strings | |
m/\w;[\w;]/ ? ";" : | |
m/\w,[\w,]/ ? "," : | |
m/\w\t[\w,]/ ? "\t" : | |
";" ; | |
# Yeah I know it should be a ',' (hence Csv), but the majority | |
# of the csv files to be shown uses semiColon ';' instead. | |
$firstline = $_; | |
last; | |
} | |
} | |
$csv = Text::CSV_XS-> new ({ | |
sep_char => $separator eq "tab" ? "\t" : $separator, | |
quote_char => $quotechar eq "undef" ? undef : $quotechar, | |
escape_char => $escapechar eq "undef" ? undef : $escapechar, | |
binary => 1, | |
keep_meta_info => 1, | |
auto_diag => 1, | |
}); | |
if ($firstline) { | |
$csv->parse ($firstline) or die $csv->error_diag (); | |
$row = [ $csv->fields ]; | |
} | |
if ($verbosity > 3) { | |
foreach my $k (qw( sep_char quote_char escape_char )) { | |
my $c = $csv->$k () || "undef"; | |
$c =~ s/\t/\\t/g; | |
$c =~ s/\r/\\r/g; | |
$c =~ s/\n/\\n/g; | |
$c =~ s/\0/\\0/g; | |
$c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge; | |
printf STDERR "%-11s = %s\n", $k, $c; | |
} | |
} | |
if (my $rows = $datecolumns) { | |
$rows =~ s/-$/-999/; # 3,6- | |
$rows =~ s/-/../g; | |
eval "\$datecolumns = { map { \$_ => 1 } $rows }"; | |
} | |
while ($row && @$row or $row = $csv->getline ($inputfh)) { | |
my @row = @$row; | |
@row > $numcols and push @numcols, ($mincolumnwidth) x (($numcols = @row) - @numcols); | |
foreach my $c (0 .. $#row) { | |
my $val = $row[$c] || ""; | |
my $l = length $val; | |
$l > $numcols[$c] and $numcols[$c] = $l; | |
if ($utf and $csv->is_binary ($c)) { | |
from_to ($val, "utf-8", "ucs2"); | |
$wks->write_unicode ($numrows, $c, $val); | |
next; | |
} # utf | |
if ($csv->is_quoted ($c)) { | |
if ($utf) { | |
from_to ($val, "utf-8", "ucs2"); | |
$wks->write_unicode ($numrows, $c, $val); | |
} else { | |
$wks->write_string ($numrows, $c, $val); | |
} | |
next; | |
} # is_quoted | |
if (!$datecolumns or $datecolumns->{$c + 1}) { | |
my @d = (0, 0, 0); # Y, M, D | |
$val =~ m/^(\d{4})(\d{2})(\d{2})$/ and @d = ($1, $2, $3); | |
$val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1); | |
if ( $d[2] >= 1 && $d[2] <= 31 && | |
$d[1] >= 1 && $d[1] <= 12 && | |
$d[0] >= 1900 && $d[0] <= 2199) { | |
my $dm = Days_in_Month (@d[0,1]); | |
$d[2] < 1 and $d[2] = 1; | |
$d[2] > $dm and $d[2] = $dm; | |
my $dt = 2 + Delta_Days (1900, 1, 1, @d); | |
$wks->write ($numrows, $c, $dt, $fmt{date}); | |
next; | |
} | |
} # datecolumns | |
if (!$allowformulas && $val =~ m/^=/) { | |
$wks->write_string ($numrows, $c, $val); | |
} else { | |
$firstchar = substr $val, 0, 1; | |
#print STDERR $firstchar . " val " . $val . "\n"; | |
if ($firstchar eq '$') { | |
$remainingchar = substr $val, 1; | |
#print STDERR "other string is " . $remainingchar . "\n"; | |
$wks->write ($numrows, $c, $remainingchar, $wbkcurrencyformat); | |
} else { | |
$wks->write ($numrows, $c, $val); | |
} | |
} # allowformulas | |
} # foreach | |
++$numrows; | |
if (!($numrows % 10) and ($verbosity > 2)) { | |
printf STDERR "*"; | |
if (!($numrows % 500)) { | |
printf STDERR " processed %6d columns x %6d rows\n", $numcols, $numrows; | |
} | |
} | |
} continue { $row = undef } | |
if ($verbosity > 1) { | |
printf STDERR "\nCreated %s with %6d columns %6d rows\n", $outputfile, $numcols, $numrows; | |
} | |
$wks->set_column ($_, $_, $numcols[$_]) for 0 .. $#numcols; | |
$wbk->close (); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment