Created
August 14, 2017 23:11
-
-
Save levonet/e3e20b19030fad80c7824e87c4902b9e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=encoding utf8 | |
=head1 NAME | |
csv2mydb.pl — скрипт, выгружает уникальные адреса из полей Postort, Postnummer, Gatunamn, Gatnr, Ingång | |
в CSV-файле в базу данных. | |
=head1 SYNOPSIS | |
./csv2mydb.pl <csv filename> [dbhost] [dbuser] [dbpassword] [dbname] | |
=cut | |
use strict; | |
use warnings; | |
use utf8; | |
use DBI; | |
my ($csv_file, $db_host, $db_user, $db_passwd, $db_name) = @ARGV; | |
my @exp_fields = qw/Postort Postnummer Gatunamn Gatnr Ingång/; | |
my $idx_fields; | |
my $dbh; | |
my $st_drop = 0; | |
my $st_ins = 0; | |
unless (defined $csv_file) { | |
print "Usage:\n\t$0 <csv filename> [dbhost] [dbuser] [dbpassword] [dbname]\n"; | |
exit 1; | |
} | |
die "Error: file `$csv_file` not found" | |
unless -e $csv_file; | |
open(FILE, $csv_file) || die "Error: $!"; | |
while (<FILE>) { | |
chomp; | |
my @line = split(/;/); | |
unless (defined $idx_fields) { | |
$idx_fields = make_field_index(\@line, \@exp_fields); | |
next; | |
} | |
my $address = {}; | |
for (@exp_fields) { | |
$address->{$_} = $line[$idx_fields->{$_}] | |
} | |
export2db($address); | |
} | |
close FILE; | |
my_disconnect(); | |
# Статистика | |
print "Dropped: $st_drop\n"; | |
print "Inserted: $st_ins\n"; | |
exit; | |
=head1 METHODS | |
=item B<make_field_index(\@line, \@fields)> | |
Cтроит индекс экспортируемых полей из csv | |
Возвращает хеш, где по ключу хранится номер колонки | |
=cut | |
sub make_field_index { | |
my ($line, $fields) = @_; | |
my $idx = {}; | |
my $i = 0; | |
for my $field (@{$line}) { | |
for (@{$fields}) { | |
if ($_ eq $field) { | |
$idx->{$_} = $i; | |
last; | |
} | |
} | |
++$i; | |
} | |
return $idx; | |
} | |
=item B<split_field($field)> | |
Разделяет поле, если оно разбито запятой или состоит из диапазона | |
Возвращает указатель на массив | |
=cut | |
sub split_field { | |
my ($field) = @_; | |
my @result; | |
for my $listed (split(/,/, $field)) { | |
if ($listed =~ m/^([\dA-Z])-([\dA-Z])$/) { | |
for ($1..$2) { | |
push(@result, $_); | |
} | |
} else { | |
push(@result, $listed); | |
} | |
} | |
push(@result, "") if !@result; | |
return \@result; | |
} | |
=item B<export2db($address)> | |
Разворачивает номера и литеры и добавляет адрес в БД | |
=cut | |
sub export2db { | |
my ($address) = @_; | |
for my $number (@{split_field($address->{'Gatnr'})}) { | |
for my $entry (@{split_field($address->{'Ingång'})}) { | |
my_insert_uniq({ | |
City => $address->{'Postort'}, | |
Zip => $address->{'Postnummer'}, | |
Street => $address->{'Gatunamn'}, | |
Number => $number, | |
Entry => $entry | |
}); | |
} | |
} | |
} | |
=item B<export2db($address)> | |
Добавляет уникальный адрес в БД | |
=cut | |
sub my_insert_uniq { | |
my ($adr) = @_; | |
my_connect() unless defined $dbh; | |
my $sth = $dbh->prepare("SELECT AddressID FROM Address WHERE Zip=? AND City=? AND Street=? AND Number=? AND Entry=?"); | |
$sth->execute( | |
$adr->{'Zip'}, | |
$adr->{'City'}, | |
$adr->{'Street'}, | |
$adr->{'Number'}, | |
$adr->{'Entry'} | |
); | |
my ($AddressID) = $sth->fetchrow_array(); | |
$sth->finish(); | |
return $st_drop++ if defined $AddressID; | |
$dbh->do("INSERT INTO Address VALUES (null, ?, ?, ?, ?, ?)", undef, | |
$adr->{'Zip'}, | |
$adr->{'City'}, | |
$adr->{'Street'}, | |
$adr->{'Number'}, | |
$adr->{'Entry'} | |
); | |
$st_ins++; | |
} | |
=item B<my_connect()> | |
Открывает соединение с БД и при необходимости создаёт таблицу Address | |
=cut | |
sub my_connect { | |
my $dsn = "DBI:mysql:database=".(defined $db_name ? $db_name : "import_test") | |
.";host=".(defined $db_host ? $db_host : "localhost"); | |
$db_user = defined $db_user ? $db_user : "root"; | |
$db_passwd = defined $db_passwd ? $db_passwd : ""; | |
$dbh = DBI->connect($dsn, $db_user, $db_passwd, {mysql_enable_utf8 => 1}); | |
$dbh->do("CREATE TABLE IF NOT EXISTS Address (AddressID INT AUTO_INCREMENT, Zip INT, City VARCHAR(100), Street VARCHAR(100), Number VARCHAR(8), Entry VARCHAR(8), PRIMARY KEY (AddressID))"); | |
} | |
=item B<my_disconnect()> | |
Закрывеат соединение с БД | |
=cut | |
sub my_disconnect { | |
return unless defined $dbh; | |
$dbh->disconnect(); | |
$dbh = undef; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment