Skip to content

Instantly share code, notes, and snippets.

@Vovan-VE
Created June 22, 2021 10:58
Show Gist options
  • Save Vovan-VE/18f8cab84f699bc70bb40bd1b1b51898 to your computer and use it in GitHub Desktop.
Save Vovan-VE/18f8cab84f699bc70bb40bd1b1b51898 to your computer and use it in GitHub Desktop.
Merge series of single row INSERT from `mysqldump` into series of multi-INSERT
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Getopt::Long;
sub usage(;$) {
my ($exit) = @_;
say 'Usage';
say " $0 [--max-length=<length>]";
say '';
say 'Description';
say '';
say ' Reads input and writes to output.';
exit($exit) if defined $exit;
}
sub match($$) {
my ($str, $re) = @_;
return unless $str =~ /($re)/;
return {
str => $&,
len => length $&,
from => $-[0],
to => $+[0],
};
}
my %opt = (
'max-length' => 32768,
);
Getopt::Long::Configure("bundling");
GetOptions(\%opt,
"help|h|?",
"max-length=i",
)
or usage(2);
usage(0) if defined $opt{help};
my $re_name = qr/`[^`]++`|"[^"]++"/;
my $re_start = qr/
^ INSERT \h+ INTO \h+ $re_name \h*
(?:
\(
$re_name \h*
(?:
, \h*
$re_name \h*
)*
\) \h*
)?
VALUES \h*
/x;
my $re_end = qr/;$/;
use constant {
REPLACE_START => ',',
REPLACE_END => '',
};
my $in_first;
my $in_second;
my $out_first;
my $out_second;
my $first_begins;
my $second_begins;
my $first_ends;
my $second_ends;
my $joined_length;
binmode STDIN, ":bytes";
binmode STDOUT, ":bytes";
while (<STDIN>) {
chomp;
print $out_first if defined $out_first;
my $line = $_;
($in_first, $in_second) = ($in_second, $line);
($out_first, $out_second) = ($out_second, $line);
# skip step on first input line where no prev line is
next unless defined $in_first;
($first_begins, $second_begins) = ($second_begins, match($in_second, $re_start));
($first_ends, $second_ends) = ($second_ends, match($in_second, $re_end));
if ($first_begins && $first_ends && $second_begins) {
if ($first_begins->{str} eq $second_begins->{str}) {
my $new_join_length = $joined_length // length $out_first;
$new_join_length += length(REPLACE_END) - length($first_ends->{str});
$new_join_length += length($out_second) - length($second_begins->{str});
$new_join_length += length REPLACE_START;
if ($new_join_length < $opt{'max-length'}) {
substr($out_first, -$first_ends->{len}) = REPLACE_END;
substr($out_second, $second_begins->{from}, $second_begins->{len}) = REPLACE_START;
$joined_length = $new_join_length;
next;
}
}
}
undef $joined_length;
$out_first .= "\n";
}
print $out_first if defined $out_first;
print $out_second if defined $out_second;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment