public
Last active

Create MySQL Table Definition and Insert Statements From CSV

  • Download Gist
mysql-import
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
#!/usr/bin/perl
 
use strict;
use warnings;
 
use Data::Dumper;
use Text::ParseWords;
use List::Util 'max';
 
my $file = shift || die "Need a csv file to process\n";
 
my $table_name = 'Addresses';
my $engine = 'InnoDB';
my $charset = 'latin1';
 
# Read and process header line
open my $data, '<', $file or die "Cannot open $file for reading\n";
 
my $header = <$data>;
chomp $header;
 
# @cols will be an array of hashes. Each has will contain details
# of one of the columns
my @cols = map { { name => $_ } } parse_line ',', 0, $header;
for (@cols) {
$_->{name} =~ s/\s+/_/g;
$_->{name} =~ s/'/\\'/g;
}
 
# Read the rest of the data
while (<$data>) {
chomp;
my @row = parse_line ',', 0, $_ ;
for my $v (@row) {
s/'/\\'/g;
}
 
foreach my $col_no (0 .. $#cols) {
push @{$cols[$col_no]{values}}, $row[$col_no];
}
}
 
# Analyse the data
foreach (@cols) {
guess_type($_);
}
 
# Output the table
open my $table, '>', 'mysql_create_table2.sql' or
die "Can't open file for table: $!";
 
print $table table_def(@cols);
 
# Output the data
open my $values, '>', 'mysql_data_values2.sql' or
die "Can't open file for values: $!";
 
for my $row (0 .. $#{$cols[0]{values}}) {
print $values insert($row, @cols);
}
 
die Dumper \@cols;
 
# Analyse an array of column hashes and fill in various information
# about the columns by looking at the data values in each column.
sub guess_type {
my $column = shift;
 
$column->{type} = 'varchar';
 
foreach my $val (@{$column->{values}}) {
if ($val !~ /^-?\d+(\.\d+)?$/) {
$column->{type} = 'varchar';
last;
}
 
if ($val =~ /^-?\d+\.\d+$/) {
$column->{type} = 'decimal';
} else {
$column->{type} = 'int';
}
}
 
if ($column->{type} eq 'decimal') {
$column->{dec1} = max map { length +(split /\./)[0] } @{$column->{values}};
$column->{dec2} = max map { length +(split /\./)[1] } @{$column->{values}};
} else {
$column->{length} = max map { length $_ } @{$column->{values}};
}
}
 
# Return a table definition string given an array of column hashes.
sub table_def {
my @columns = @_;
 
return "\n\nCREATE TABLE `$table_name` (\n" .
join(",\n", map { column_def($_) } @columns) .
"\n) ENGINE=$engine DEFAULT CHARSET=$charset\n" .
"\n\n";
}
 
# Given a column hash, return a string containing the SQL column definition.
sub column_def {
my $column = shift;
 
my $def = " `$column->{name}` $column->{type} ";
 
if ($column->{type} eq 'decimal') {
my $dec_length = $column->{dec1} + $column->{dec2};
$def .= "($dec_length,$column->{dec2})";
} else {
$def .= "($column->{length})";
}
 
return $def;
}
 
# Given a row number and an array of column hashes, return a string
# containing an SQL insert statement for the given row.
sub insert {
my ($row, @columns) = @_;
 
return "insert into $table_name (" .
join(', ', map { $_->{name} } @columns) .
") \nvalues (" .
join(', ', map { "'$_->{values}[$row]'" } @cols) .
");\n";
}

See http://scriptingmysql.wordpress.com/2011/09/02/convert-csv-file-to-mysql-database-via-perl/ for the description of the problem I'm trying to solve and more discussion.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.