Last active
May 31, 2022 07:35
-
-
Save ignat-s/a70019295cfd136085368e512e93dcc4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This script allows restoring DB from text dump created via pg_dump with excluding of data of specified tables. | |
* | |
* Sample command that can help you to restore some dump ignoring these 3 tables that: | |
* <code> | |
* gunzip -c orodb.sql.gz \ | |
* | php filter_pg_dump.php \ | |
* --exclude-table-data oro_email_attachment_content \ | |
* --exclude-table-data oro_audit \ | |
* --exclude-table-data oro_audit_field \ | |
* | psql -h 127.0.0.1 -p 5432 -U postgres -w -d orodb | |
* </code> | |
* | |
* Use case. | |
* | |
* Assuming DB dump file was created using pg_dump command in text format. This file has all tables and some of them are | |
* very large. You may have a situation when: | |
* - you don't need these tables. | |
* - you don't have disk space simply to remove data of these tables after restore. | |
* - you want DB restore to work faster by excluding unnecessary data. | |
* | |
* Implementation in this script excludes lines from text pg_dump file that are used to insert data in tables. | |
* Example of a COPY block of lines in the SQL dump file: | |
* <code> | |
* COPY public.oro_website (id, organization_id, business_unit_owner_id, name, created_at, updated_at, is_default, guest_role_id, default_role_id, serialized_data) FROM stdin; | |
* 1 1 1 Default 2021-03-25 05:19:28 2021-03-25 05:19:28 t 3 2 Tjs= | |
* 2 2 2 Asurity 2021-03-25 05:19:33 2021-03-25 05:19:33 f 5 4 Tjs= | |
* \. | |
* </code> | |
* | |
* These lines will be excluded from the dump in case if option "--exclude-table-data oro_website" is provided. | |
* | |
* Test commands. | |
* | |
* Sample commands to test this script: | |
* <code> | |
* # Read dump file and save filtered output that excludes data odf specified in new a dump file | |
* php filter_pg_dump.php --exclude-table-data oro_email_attachment_content < sample-dump.sql | |
* | |
* # Same case using cat | |
* cat sample-dump.sql | php filter_pg_dump.php --exclude-table-data oro_email_attachment_content | |
* </code> | |
* | |
* @author Ignat Shcheglovskyi <ignat@oroinc.com> | |
*/ | |
const COPY_START_PREFIX = 'COPY '; | |
const COPY_START_REGEX_PATTERN = '/^COPY (\w+)\.(\w+) /'; | |
const COPY_END = "\.\n"; | |
$options = getopt('', ['exclude-table-data:']); | |
if (empty($options['exclude-table-data'])) { | |
fwrite(STDERR, "Provide at least one option with value: --exclude-table-data\n"); | |
exit(1); | |
} | |
$excludeTablesData = (array)$options['exclude-table-data']; | |
$prevLine = ''; | |
$prevLineExcluded = false; | |
while (false !== ($line = fgets(STDIN))) { | |
$lineExcluded = isLineExcluded($line, $prevLine, $prevLineExcluded, $excludeTablesData); | |
if (!$lineExcluded) { | |
echo $line; | |
} | |
$prevLine = $line; | |
$prevLineExcluded = $lineExcluded; | |
} | |
function isLineExcluded(string $line, string $prevLine, bool $prevLineExcluded, array $excludeTablesData): bool | |
{ | |
if ($prevLineExcluded) { | |
if ($prevLine == COPY_END) { | |
return false; | |
} else { | |
return true; | |
} | |
} | |
if (0 !== strpos($line, COPY_START_PREFIX)) { | |
return false; | |
} | |
if (!preg_match(COPY_START_REGEX_PATTERN, $line, $matches)) { | |
// This is weird, we might have bug in this case | |
return false; | |
} | |
$table = $matches[2]; | |
if (in_array($table, $excludeTablesData)) { | |
return true; | |
} | |
return false; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment