Skip to content

Instantly share code, notes, and snippets.

@colinmollenhour
Forked from divinity76/process-mysqldump.c
Last active July 10, 2018 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save colinmollenhour/cf23b0f7e955267ed1107c9edb07f7c2 to your computer and use it in GitHub Desktop.
Save colinmollenhour/cf23b0f7e955267ed1107c9edb07f7c2 to your computer and use it in GitHub Desktop.
Add newlines before parenthesis for a SQL mysqldump
// gcc -O2 -Wall -pedantic process-mysqldump.c -o process-mysqldump
// Usage: cat dump.sql | process-mysqldump
// Or : process-mysqldump dump.sql
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define BUFFER 100000
bool is_escaped(char* string, int offset) {
if (offset == 0) {
return false;
} else if (string[offset - 1] == '\\') {
return !is_escaped(string, offset - 1);
} else {
return false;
}
}
bool is_insert(char* string) {
char buffer[] = "INSERT INTO ";
return strncmp(buffer, string, 12) == 0;
}
int main(int argc, char *argv[])
{
FILE* file = argc > 1 ? fopen(argv[1], "rb") : stdin;
char buffer[BUFFER];
char* line;
int pos;
int parenthesis = 0;
bool quote = false;
bool escape = false;
bool check_prefix = true;
bool wasnt_insert = false;
while (fgets(buffer, BUFFER, file) != NULL) {
line = buffer;
// skip non-INSERT INTO statements
if (check_prefix && (wasnt_insert || ! is_insert(line))) {
check_prefix = line[strlen(line) - 1] == '\n';
wasnt_insert = ! check_prefix;
fputs(line, stdout);
continue;
}
check_prefix = line[strlen(line) - 1] == '\n';
pos = 0;
nullchar:
while (line[pos] != '\0') {
// if we are still in escape state, we need to check first char.
if (!escape) {
// find any character in ()'
pos = strcspn(line, "()'\\");
}
if (pos > 0) {
// print before match
printf("%.*s", pos, line);
}
switch (line[pos]) {
case '(':
if (!quote) {
if (parenthesis == 0) {
puts("");
}
parenthesis++;
}
if (escape) {
escape = false;
}
break;
case ')':
if (!quote) {
if (parenthesis > 0) {
parenthesis--;
} else {
// whoops
puts("");
fputs(line, stdout);
fputs("Found closing parenthesis without opening one.\n", stderr);
exit(1);
}
}
if (escape) {
escape = false;
}
break;
case '\\':
escape = !escape;
break;
case '\'':
if (escape) {
escape = false;
} else {
quote = !quote;
}
break;
case '\0':
goto nullchar;
default:
if (escape) {
escape = false;
}
break;
}
// print char then skip it (to make sure we don’t double match)
putchar(line[pos]);
line = line + pos + 1;
pos = 0;
}
}
return 0;
}
<?php
// Usage: cat dump.sql | php process-mysqldump.php
while (false !== ($line = fgets_big_buffer())) {
if (substr($line, 0, 6) === 'INSERT') {
process_line($line);
} else {
echo $line;
}
}
function fgets_big_buffer()
{
$ret = fgets(STDIN, 1 * 1024 * 1024);
if ($ret === false || $ret[strlen($ret) - 1] === "\n") {
return $ret;
}
while (1) {
$tmp = fgets(STDIN, 1 * 1024 * 1024);
if ($tmp === false) {
return $ret;
}
$ret .= $tmp;
if ($tmp[strlen($tmp) - 1] === "\n") {
return $ret;
}
}
}
function process_line($line)
{
$length = strlen($line);
$pos = strpos($line, ' VALUES ') + 8;
$ret = substr($line, 0, $pos);
$parenthesis = false;
$quote = false;
$escape = false;
for ($i = $pos; $i < $length; ++ $i) {
switch ($line[$i]) {
case '(':
if (! $quote) {
if ($parenthesis) {
throw new \Exception ( 'double open parenthesis' );
} else {
$ret .= "\n";
$parenthesis = true;
}
}
$escape = false;
break;
case ')':
if (! $quote) {
if ($parenthesis) {
$parenthesis = false;
} else {
throw new Exception ( 'closing parenthesis without open' );
}
}
$escape = false;
break;
case '\\':
$escape = ! $escape;
break;
case "'":
if ($escape) {
$escape = false;
} else {
$quote = ! $quote;
}
break;
default:
$escape = false;
break;
}
$ret .= $line[$i];
$to = strcspn($line, '()\\\'', $i + 1)-1;
if ($to > 0) {
$ret .= substr($line, $i + 1, $to);
$i += ($to);
}
}
echo $ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment