Skip to content

Instantly share code, notes, and snippets.

@vasansr
Created December 31, 2016 07:26
Show Gist options
  • Save vasansr/07420a402a74c1b559ffd6767eb66747 to your computer and use it in GitHub Desktop.
Save vasansr/07420a402a74c1b559ffd6767eb66747 to your computer and use it in GitHub Desktop.
Create SQL commands based on a CSV file (perl)
#!/usr/bin/perl
#
use Text::CSV_XS;
use IO::Handle;
use Getopt::Long;
sub usage {
print "Usage: csv2sql.pl [OPTIONS] <csvfile>\n";
print "Dump sql statements based on input csv\n";
print "Conventions: \n";
print " Table name is derived from of csv file name by stripping .csv\n";
print " First line of csv file contains column names\n";
print " Columns with names beginning with a # are ignored\n";
print "Options:\n";
print " --update=<n>: update using first n columns as key (the default is to insert)\n";
print " --table=<table>: override default table name derived from file name\n";
}
our $opt_update = 0;
our $opt_table = "";
GetOptions("update=i", "table=s") or die usage();
my $csvfile = $ARGV[0];
if (!$csvfile) {
print "Error: No filename specified\n";
usage();
exit 1;
}
if ($opt_table) {
$table = $opt_table;
} else {
$table = $csvfile;
$table =~ s/\.csv$//;
}
# get the csv
my $csv = Text::CSV_XS->new();
my $f;
open ($f, "<", "$csvfile") or die "Could not open $csvfile: $!";
my $col_names = $csv->getline($f);
my @column_commented;
for (my $i=0; $i<(@$col_names); $i++) {
if ($col_names->[$i] =~ /^#/) {
$column_commented[$i] = 1;
} else {
$column_commented[$i] = 0;
}
}
# create the list of columns
my $column_list = "";
for (my $i=0; $i<(@$col_names); $i++) {
unless ($column_commented[$i]) {
comma_append(\$column_list, $col_names->[$i]);
}
}
while (my $row = $csv->getline($f)) {
my @fields = @$row;
next if ($fields[0] eq '#');
if ($opt_update) {
spewUpdate(@fields);
} else {
spewInsert(@fields);
}
}
sub spewInsert {
my @fields = @_;
my $values = "";
for (my $i=0; $i<(@$col_names); $i++) {
unless ($column_commented[$i]) {
comma_append(\$values, "'" . sql_escape($fields[$i]) . "'");
}
}
print "INSERT INTO $table ($column_list) values($values);\n";
}
sub spewUpdate {
# opt_update has the number of columns that is to be used for the primary
# key. Assumption is that all primary key columns will appear in the beginning
# (excluding commented columns)
my @fields = @_;
my $col_index = 0;
my $values = "";
for (my $i=0; $i<(@$col_names); $i++) {
unless ($column_commented[$i]) {
# exclude pkeys
unless ($col_index++ < $opt_update) {
comma_append(\$values, $col_names->[$i] . "='" . sql_escape($fields[$i]) . "'");
}
}
}
my $filter = "";
$col_index = 0;
for (my $i=0; $i<(@$col_names); $i++) {
unless ($column_commented[$i]) {
# include only pkeys
if ($col_index++ < $opt_update) {
and_append(\$filter, $col_names->[$i] . "='" . sql_escape($fields[$i]) . "'");
} else {
last;
}
}
}
print "UPDATE $table SET $values WHERE $filter;\n";
}
sub comma_append {
my ($str_ref, $to_append) = @_;
if ($$str_ref) {
$$str_ref .= ", ";
}
$$str_ref .= $to_append;
}
sub and_append {
my ($str_ref, $to_append) = @_;
if ($$str_ref) {
$$str_ref .= " AND ";
}
$$str_ref .= $to_append;
}
sub sql_escape {
$str = shift;
$str =~ s/\'/\'\'/g;
return $str;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment