Skip to content

Instantly share code, notes, and snippets.

@levonet
Created August 14, 2017 23:11
Show Gist options
  • Save levonet/e3e20b19030fad80c7824e87c4902b9e to your computer and use it in GitHub Desktop.
Save levonet/e3e20b19030fad80c7824e87c4902b9e to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
=encoding utf8
=head1 NAME
csv2mydb.pl — скрипт, выгружает уникальные адреса из полей Postort, Postnummer, Gatunamn, Gatnr, Ingång
в CSV-файле в базу данных.
=head1 SYNOPSIS
./csv2mydb.pl <csv filename> [dbhost] [dbuser] [dbpassword] [dbname]
=cut
use strict;
use warnings;
use utf8;
use DBI;
my ($csv_file, $db_host, $db_user, $db_passwd, $db_name) = @ARGV;
my @exp_fields = qw/Postort Postnummer Gatunamn Gatnr Ingång/;
my $idx_fields;
my $dbh;
my $st_drop = 0;
my $st_ins = 0;
unless (defined $csv_file) {
print "Usage:\n\t$0 <csv filename> [dbhost] [dbuser] [dbpassword] [dbname]\n";
exit 1;
}
die "Error: file `$csv_file` not found"
unless -e $csv_file;
open(FILE, $csv_file) || die "Error: $!";
while (<FILE>) {
chomp;
my @line = split(/;/);
unless (defined $idx_fields) {
$idx_fields = make_field_index(\@line, \@exp_fields);
next;
}
my $address = {};
for (@exp_fields) {
$address->{$_} = $line[$idx_fields->{$_}]
}
export2db($address);
}
close FILE;
my_disconnect();
# Статистика
print "Dropped: $st_drop\n";
print "Inserted: $st_ins\n";
exit;
=head1 METHODS
=item B<make_field_index(\@line, \@fields)>
Cтроит индекс экспортируемых полей из csv
Возвращает хеш, где по ключу хранится номер колонки
=cut
sub make_field_index {
my ($line, $fields) = @_;
my $idx = {};
my $i = 0;
for my $field (@{$line}) {
for (@{$fields}) {
if ($_ eq $field) {
$idx->{$_} = $i;
last;
}
}
++$i;
}
return $idx;
}
=item B<split_field($field)>
Разделяет поле, если оно разбито запятой или состоит из диапазона
Возвращает указатель на массив
=cut
sub split_field {
my ($field) = @_;
my @result;
for my $listed (split(/,/, $field)) {
if ($listed =~ m/^([\dA-Z])-([\dA-Z])$/) {
for ($1..$2) {
push(@result, $_);
}
} else {
push(@result, $listed);
}
}
push(@result, "") if !@result;
return \@result;
}
=item B<export2db($address)>
Разворачивает номера и литеры и добавляет адрес в БД
=cut
sub export2db {
my ($address) = @_;
for my $number (@{split_field($address->{'Gatnr'})}) {
for my $entry (@{split_field($address->{'Ingång'})}) {
my_insert_uniq({
City => $address->{'Postort'},
Zip => $address->{'Postnummer'},
Street => $address->{'Gatunamn'},
Number => $number,
Entry => $entry
});
}
}
}
=item B<export2db($address)>
Добавляет уникальный адрес в БД
=cut
sub my_insert_uniq {
my ($adr) = @_;
my_connect() unless defined $dbh;
my $sth = $dbh->prepare("SELECT AddressID FROM Address WHERE Zip=? AND City=? AND Street=? AND Number=? AND Entry=?");
$sth->execute(
$adr->{'Zip'},
$adr->{'City'},
$adr->{'Street'},
$adr->{'Number'},
$adr->{'Entry'}
);
my ($AddressID) = $sth->fetchrow_array();
$sth->finish();
return $st_drop++ if defined $AddressID;
$dbh->do("INSERT INTO Address VALUES (null, ?, ?, ?, ?, ?)", undef,
$adr->{'Zip'},
$adr->{'City'},
$adr->{'Street'},
$adr->{'Number'},
$adr->{'Entry'}
);
$st_ins++;
}
=item B<my_connect()>
Открывает соединение с БД и при необходимости создаёт таблицу Address
=cut
sub my_connect {
my $dsn = "DBI:mysql:database=".(defined $db_name ? $db_name : "import_test")
.";host=".(defined $db_host ? $db_host : "localhost");
$db_user = defined $db_user ? $db_user : "root";
$db_passwd = defined $db_passwd ? $db_passwd : "";
$dbh = DBI->connect($dsn, $db_user, $db_passwd, {mysql_enable_utf8 => 1});
$dbh->do("CREATE TABLE IF NOT EXISTS Address (AddressID INT AUTO_INCREMENT, Zip INT, City VARCHAR(100), Street VARCHAR(100), Number VARCHAR(8), Entry VARCHAR(8), PRIMARY KEY (AddressID))");
}
=item B<my_disconnect()>
Закрывеат соединение с БД
=cut
sub my_disconnect {
return unless defined $dbh;
$dbh->disconnect();
$dbh = undef;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment