Skip to content

Instantly share code, notes, and snippets.

@Vovan-VE
Created June 22, 2021 09:11
Show Gist options
  • Save Vovan-VE/c6f6d2c44ae6d3252e59163445f17d8b to your computer and use it in GitHub Desktop.
Save Vovan-VE/c6f6d2c44ae6d3252e59163445f17d8b to your computer and use it in GitHub Desktop.
mysqldump to pg_dump

dump-my2pg

Read data from MySQL like mysqldump does, but output data to STDOUT like pg_dump does.

Notice: Only data will be dumped with COPY query. Nothing else.

SYNOPSIS

dump-my2pg [options] DBNAME [TABLE...]

Example:

PASSWORD=12345 dump-my2pg -uroot -p --bool='TABLE1:COL2,COL5;TABLE7:COL3' my-db-name

DEPENDENCIES

  • Perl DBD MySQL module.
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use 5.010;
use encoding 'utf8';
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
use Getopt::Long;
use DBI;
sub usage(;$) {
my ($exit) = @_;
say 'Usage';
say " $0 [options] DBNAME [TABLE...]";
say '';
say 'Description';
say '';
say ' Dump MySQL database data to STDOUT with `pg_dump` format.';
say '';
say 'Options';
say '';
say ' --bool=REFS';
say ' Define columns for boolean typecast. REFS is list of REF separated';
say ' by ; . REF is TABLE:COLUMNS and COLUMNS is list of columns names';
say ' separated by , .';
say ' Example:';
say ' --bool="article:is_deleted,is_published;category:is_deleted"';
say '';
say ' -H, --host=HOST';
say ' MySQL host name to connect to. Defaults to localhost.';
say '';
say ' -p, --password';
say ' Whether to use password to connect to MySQL. If this flag is set, then';
say ' the password must be set in PASSWORD env variable.';
say ' Default is false, which can be forced by --no-password.';
say '';
say ' -P, --port=PORT';
say ' Port to connect to MySQL. Default is 3306.';
say '';
say ' -u, --user=USER';
say ' MySQL username to connect to MySQL. Default is root.';
say '';
exit($exit) if defined $exit;
}
my %opt = (
user => 'root',
host => 'localhost',
port => 3306,
);
Getopt::Long::Configure("bundling");
GetOptions(\%opt,
"help|h|?",
"user|u=s",
"password|p!",
"host|H=s",
"port|P=i",
"bool=s",
)
or usage(2);
usage(0) if defined $opt{help};
usage(1) unless @ARGV;
my ($dbname, @tables) = @ARGV;
my %bool;
if (defined $opt{bool}) {
for my $item (split ';', $opt{bool}) {
my ($table, $cols) = split ':', $item;
my $t = $bool{$table} //= {};
for my $col (split ',', $cols) {
$t->{$col} = 1;
}
}
}
# ==== CONNECT ====================================
print "";
my $dbh = DBI->connect(
"DBI:mysql:database=$dbname;host=$opt{host};port=$opt{port}",
$opt{user},
$opt{password} ? $ENV{PASSWORD} // '' : undef,
{
RaiseError => 1,
mysql_enable_utf8 => 1,
mysql_enable_utf8mb4 => 1,
}
) or die "E! Cannot connect: $DBI::errstr";
$dbh->{mysql_use_result} = 1;
eval {
# DBI 1.627 + Perl 5.16.3 on CentOS 7 was buggy with charset
$dbh->do(q{
SET @@sql_mode = '',
@@time_zone = '+00:00',
@@net_read_timeout = 600,
@@character_set_database = 'utf8mb4',
@@character_set_server = 'utf8mb4'
});
$dbh->do('SET NAMES utf8mb4');
};
die "E! Cannot initialize connection settings: $@\n" if $@;
# ==== TABLES LIST ====================================
{
my %unknown = map { $_ => 1 } @tables;
@tables = keys %unknown;
my @known;
my $st = $dbh->prepare('SHOW TABLES') or die "E! Cannot prepare query: $dbh->errstr";
$st->execute() or die "E! Cannot exec query: $st->errstr";
while (my ($table) = $st->fetchrow_array()) {
delete $unknown{$table};
push @known, $table;
}
die "E! Cannot fetch: $st->errstr" if $st->err;
$st->finish;
die "E! No such tables: " . join(", ", keys %unknown) if %unknown;
@tables = @known unless @tables;
}
@tables = sort { $a cmp $b } @tables;
# ==== FETCH & DUMP ====================================
sub fetch_pk($) {
my ($table) = @_;
my @pk;
my $st = $dbh->prepare(q{
SELECT
`kcu`.`COLUMN_NAME` AS `column_name`
FROM
`information_schema`.`KEY_COLUMN_USAGE` AS `kcu`,
`information_schema`.`TABLE_CONSTRAINTS` AS `tc`
WHERE
`kcu`.`TABLE_SCHEMA` = DATABASE()
AND `tc`.`TABLE_SCHEMA` = `kcu`.`TABLE_SCHEMA`
AND `kcu`.`TABLE_NAME` = ?
AND `tc`.`TABLE_NAME` = `kcu`.`TABLE_NAME`
AND `tc`.`CONSTRAINT_NAME` = `kcu`.`CONSTRAINT_NAME`
AND `tc`.`CONSTRAINT_TYPE` = 'PRIMARY KEY'
ORDER BY `kcu`.`ORDINAL_POSITION` ASC
}) or die "E! Cannot prepare query: $dbh->errstr";
$st->execute($table) or die "E! Cannot exec query: $st->errstr";
while (my ($col) = $st->fetchrow_array()) {
push @pk, $col;
}
die "E! Cannot fetch: $st->errstr" if $st->err;
$st->finish;
return @pk;
}
sub fetch_cols_str($) {
my ($table) = @_;
my @cols;
my $st = $dbh->prepare(q{
SELECT `COLUMN_NAME`
FROM `information_schema`.`COLUMNS`
WHERE
`TABLE_SCHEMA` = DATABASE()
AND `TABLE_NAME` = ?
AND `DATA_TYPE` IN (
'char', 'varchar',
'binary', 'varbinary',
'text', 'tinytext', 'mediumtext', 'longtext',
'blob', 'timyblob', 'mediumblob', 'longblob',
'enum', 'set', 'json'
)
ORDER BY `ORDINAL_POSITION` ASC
}) or die "E! Cannot prepare query: $dbh->errstr";
$st->execute($table) or die "E! Cannot exec query: $st->errstr";
while (my ($col) = $st->fetchrow_array()) {
push @cols, $col;
}
die "E! Cannot fetch: $st->errstr" if $st->err;
$st->finish;
return @cols;
}
sub q_pg_name($) {
my ($name) = @_;
$name =~ s/"/""/g;
return '"' . $name . '"';
}
sub build_pg_copy_start($@) {
my ($table, @cols) = @_;
return 'COPY public.' . q_pg_name($table) . ' (' . join(',', map { q_pg_name($_) } @cols) . ") FROM STDIN;\n";
}
my %STR_ESCAPE = (
"\\" => '\\\\',
"\n" => '\\n',
"\r" => '\\r',
"\t" => '\\t',
);
sub build_pg_str($) {
my ($s) = @_;
if (defined $s) {
$s =~ s/[\n\r\t\\]/$STR_ESCAPE{$&}/ge;
}
return $s;
}
sub build_pg_bool($) {
my ($s) = @_;
return undef unless defined $s;
return 't' if $s;
return 'f';
}
$SIG{PIPE} = sub {
exit 0x8D;
};
for my $table (@tables) {
warn "Table: $table\n";
my @pk = fetch_pk($table);
my @str_cols = fetch_cols_str($table);
my $sql = "SELECT * FROM " . $dbh->quote_identifier($table);
if (@pk) {
$sql .= " ORDER BY " . join(", ", map { $dbh->quote_identifier($_) . " ASC" } @pk);
}
my $st = $dbh->prepare($sql) or die "E! Cannot prepare query: $dbh->errstr";
$st->execute() or die "E! Cannot exec query: $st->errstr";
my @cols = @{$st->{NAME}};
my %col_index;
{
my $i = 0;
for my $col (@cols) {
$col_index{$col} = $i++;
}
}
my @str_col_index = map { $col_index{$_} } @str_cols;
my @bool_col_index = grep defined, map { $col_index{$_} } keys %{ $bool{$table} // {} };
print build_pg_copy_start($table, @cols);
while (my $row = $st->fetchrow_arrayref()) {
$row->[$_] = build_pg_str($row->[$_]) for @str_col_index;
$row->[$_] = build_pg_bool($row->[$_]) for @bool_col_index;
my $next = 0;
for my $v (@{$row}) {
if ($next) {
print "\t";
} else {
$next = 1;
}
print $v // '\\N';
}
print "\n";
}
die "E! Cannot fetch: $st->errstr" if $st->err;
$st->finish;
print "\\.\n";
print "\n";
}
$dbh->disconnect();
warn "i: +Done.\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment