Skip to content

Instantly share code, notes, and snippets.

@davorg
Created March 9, 2012 16:43
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davorg/2007422 to your computer and use it in GitHub Desktop.
Save davorg/2007422 to your computer and use it in GitHub Desktop.
Create MySQL Table Definition and Insert Statements From CSV
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Text::ParseWords;
use List::Util 'max';
my $file = shift || die "Need a csv file to process\n";
my $table_name = 'Addresses';
my $engine = 'InnoDB';
my $charset = 'latin1';
# Read and process header line
open my $data, '<', $file or die "Cannot open $file for reading\n";
my $header = <$data>;
chomp $header;
# @cols will be an array of hashes. Each has will contain details
# of one of the columns
my @cols = map { { name => $_ } } parse_line ',', 0, $header;
for (@cols) {
$_->{name} =~ s/\s+/_/g;
$_->{name} =~ s/'/\\'/g;
}
# Read the rest of the data
while (<$data>) {
chomp;
my @row = parse_line ',', 0, $_ ;
for my $v (@row) {
s/'/\\'/g;
}
foreach my $col_no (0 .. $#cols) {
push @{$cols[$col_no]{values}}, $row[$col_no];
}
}
# Analyse the data
foreach (@cols) {
guess_type($_);
}
# Output the table
open my $table, '>', 'mysql_create_table2.sql' or
die "Can't open file for table: $!";
print $table table_def(@cols);
# Output the data
open my $values, '>', 'mysql_data_values2.sql' or
die "Can't open file for values: $!";
for my $row (0 .. $#{$cols[0]{values}}) {
print $values insert($row, @cols);
}
die Dumper \@cols;
# Analyse an array of column hashes and fill in various information
# about the columns by looking at the data values in each column.
sub guess_type {
my $column = shift;
$column->{type} = 'varchar';
foreach my $val (@{$column->{values}}) {
if ($val !~ /^-?\d+(\.\d+)?$/) {
$column->{type} = 'varchar';
last;
}
if ($val =~ /^-?\d+\.\d+$/) {
$column->{type} = 'decimal';
} else {
$column->{type} = 'int';
}
}
if ($column->{type} eq 'decimal') {
$column->{dec1} = max map { length +(split /\./)[0] } @{$column->{values}};
$column->{dec2} = max map { length +(split /\./)[1] } @{$column->{values}};
} else {
$column->{length} = max map { length $_ } @{$column->{values}};
}
}
# Return a table definition string given an array of column hashes.
sub table_def {
my @columns = @_;
return "\n\nCREATE TABLE `$table_name` (\n" .
join(",\n", map { column_def($_) } @columns) .
"\n) ENGINE=$engine DEFAULT CHARSET=$charset\n" .
"\n\n";
}
# Given a column hash, return a string containing the SQL column definition.
sub column_def {
my $column = shift;
my $def = " `$column->{name}` $column->{type} ";
if ($column->{type} eq 'decimal') {
my $dec_length = $column->{dec1} + $column->{dec2};
$def .= "($dec_length,$column->{dec2})";
} else {
$def .= "($column->{length})";
}
return $def;
}
# Given a row number and an array of column hashes, return a string
# containing an SQL insert statement for the given row.
sub insert {
my ($row, @columns) = @_;
return "insert into $table_name (" .
join(', ', map { $_->{name} } @columns) .
") \nvalues (" .
join(', ', map { "'$_->{values}[$row]'" } @cols) .
");\n";
}
@davorg
Copy link
Author

davorg commented Mar 9, 2012

See http://scriptingmysql.wordpress.com/2011/09/02/convert-csv-file-to-mysql-database-via-perl/ for the description of the problem I'm trying to solve and more discussion.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment