timabell/tidy-insert-sql.pl

## tidy-insert-sql.pl
#!/usr/bin/perl

# https://gist.github.com/timabell/94b55a12db4c6ee42e10
# takes generated SQL for INSERTing records en masse and strips repetition of column names
# modern sql server accepts multiple rows separated by commas in one insert block

# before
# INSERT (x,y,z) VALUES (1,2,3)
# INSERT (x,y,z) VALUES (1,2,3)
# INSERT (x,y,z) VALUES (1,2,3)

# after
# INSERT (x,y,z) VALUES
#   (1,2,3)
# , (1,2,3)
# , (1,2,3)

# config

# sql server can only handle 1000 value rows at once
# also have to balance performance http://stackoverflow.com/a/8640583/10245
$batchSize=100;

# windows line endings (affects chomp)
$/ = "\r\n";

$file = $ARGV[0];
$first=1;
$count=0;
open(FILE,$file) or die "file open failed";

while(<FILE>)
{
	chomp;
	if ($_ =~ /^INSERT/)
	{
		$count++;
		if ($first || $count >= $batchSize)
		{
			$_ =~ s/VALUES/VALUES\r\n  /;
			print "$_\r\n";
			$first=0;
			$count=0;
		} else {
			$_ =~ s/INSERT.*VALUES/ ,/;
			print "$_\r\n";
		}
	} else {
		print "$_\r\n";
		$first=1;
		$count=0;
	}
}
close(FILE);
print "\r\n";