Skip to content

Instantly share code, notes, and snippets.

@brad-jones
Last active December 31, 2020 03:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brad-jones/faa2004d72bfde6e9175 to your computer and use it in GitHub Desktop.
Save brad-jones/faa2004d72bfde6e9175 to your computer and use it in GitHub Desktop.
DbDiff
<?php
/**
* Class: DbDiff
* =============================================================================
* This class will output the SQL needed to transform db_1 into db_2 including
* DATA changes, not just schema changes!!! And it pretty fast, when run locally
* at least. I developed this to help me detect changes in CMS type databases
* like wordpress so I could create SQL migration and seeding scripts easily.
*
* Brad Jones <brad @="bjc.id.au" />
*
* Credit to Stephen Morley for the Diff Class.
*/
class DbDiff
{
private $debug = false;
private $db_1 = null;
private $db_2 = null;
private $sql_diff = '';
public function __construct()
{
// Log into both databases
$this->db_1 = new mysqli('host1', 'user1', 'pass1', 'dbname1');
$this->db_2 = new mysqli('host2', 'user2', 'pass2', 'dbname2');
// Get our tables
$table_schemas_1 = $this->getTables(1);
$table_schemas_2 = $this->getTables(2);
// Compare the schemas
if (print_r($table_schemas_1, true) != print_r($table_schemas_2, true))
{
// Flaten our 2 arrays
$schema = [];
foreach($table_schemas_1 as $table => $table_schema)
{
$schema[$table.',1'] = $table_schema;
}
foreach($table_schemas_2 as $table => $table_schema)
{
$schema[$table.',2'] = $table_schema;
}
// Lets loop through the diffrences and create the
// SQL needed to make the changes to the second db.
foreach($schema as $key => $table_schema)
{
// Grab the table name and db id
list($table, $db) = explode(',', $key);
// Basically do the opposite of one another
switch($db)
{
case 1:
if (isset($schema[$table.',2']) && $schema[$table.',2'] != false)
{
// The table exists in both dbs
$this->createTableSchemaDiff($table, $schema[$table.',1'], $schema[$table.',2']);
// We have dealt with both of these now, we can set them to false
$schema[$table.',1'] = false;
$schema[$table.',2'] = false;
}
elseif (!isset($schema[$table.',2']))
{
// We don't need this table in our diff
$this->sql_diff .= "DROP TABLE `$table`;\n\n";
}
break;
case 2:
if (isset($schema[$table.',1']) && $schema[$table.',1'] != false)
{
// The table exists in both dbs'
$this->createTableSchemaDiff($table, $schema[$table.',1'], $schema[$table.',2']);
// We have dealt with both of these now, we can set them to false
$schema[$table.',1'] = false;
$schema[$table.',2'] = false;
}
elseif (!isset($schema[$table.',1']))
{
// Add the create table sql to our diff
$this->sql_diff .= $table_schema."\n\n";
}
break;
}
}
}
else
{
foreach($table_schemas_1 as $table => $table_schema)
{
// The table exists in both dbs
$this->createTableDataDiff($table);
}
if ($this->debug)
{
echo "DATABASE SCHEMA IDENTICAL\n";
}
}
// Finally output the diff sql
echo $this->sql_diff;
}
/**
* Method: getTables
* =========================================================================
* This will query each db and make a list of table schemas.
*
* Parameters:
* -------------------------------------------------------------------------
* $db_num - Just a number to indetify which db to query.
*
* Returns:
* -------------------------------------------------------------------------
* array
*/
private function getTables($db_num)
{
// This is what we will return
$output = [];
// Grab a list of tables;
$tables = $this->{'db_'.$db_num}->query('SHOW FULL TABLES')->fetch_all();
// Loop through the list
foreach ($tables as $table)
{
// Make sure it's of type `BASE TABLE`
if ($table[1] == 'BASE TABLE')
{
// Get the SQL to create the table
$result = $this->{'db_'.$db_num}
->query('SHOW CREATE TABLE '.$table[0])
->fetch_all()
;
// Remove the AUTO_INCREMENT section
$sql = preg_replace
(
"/\s?AUTO_INCREMENT=\d+\s?/",
" ",
$result[0][1]
);
// Add it to our schema list
$output[$table[0]] = $sql;
}
}
// Return the array
return $output;
}
/**
* Method: createTableSchemaDiff
* =========================================================================
* This will create the diff sql for the given table.
*
* Parameters:
* -------------------------------------------------------------------------
* $table_name - The name of the table we are comparing.
* $table_1 - The CREATE sql for the first table.
* $table_2 - The CREATE sql for the second table.
*
* Returns:
* -------------------------------------------------------------------------
* void
*/
private function createTableSchemaDiff($table_name, $table_1, $table_2)
{
// Quickly just check if they are the same or not
if ($table_1 != $table_2)
{
// Compare the lines of the CREATE sql
$diff_result = Diff::compare($table_1, $table_2);
// Loop through the results
$shown_unsupported_diff = false;
$lines = new ArrayIterator($diff_result);
foreach ($lines as $key => $line)
{
// Clean up the sql
$sql = str_replace(',', '', trim($line[0]));
// We only care if it's diffrent
if ($line[1] > 0)
{
// Is it a standard column
if(preg_match('/^`(.*)`/', $sql, $matches))
{
// Grab its name
$column_name = $matches[1];
// Do we need to add, modify or drop the column
switch($line[1])
{
case Diff::INSERTED:
// Add the column to our sql diff
$this->sql_diff .= "ALTER TABLE $table_name ADD COLUMN $sql;\n\n";
break;
case Diff::DELETED:
// Check to see if the next diff result is for
// the same column, ie: we need to modify a column.
if (strpos($lines[$key+1][0], $column_name) !== false && $lines[$key+1][1] == Diff::INSERTED)
{
// Add the column to our sql diff
$this->sql_diff .= "ALTER TABLE $table_name MODIFY COLUMN $sql;\n\n";
// Skip the next element in the array
$lines->next();
}
else
{
// Drop the column from our sql diff
$this->sql_diff .= "ALTER TABLE $table_name DROP COLUMN `$column_name`;\n\n";
}
break;
}
}
else
{
// We only want to show this once
if (!$shown_unsupported_diff)
{
$this->sql_diff .=
"/* \n".
"================================================================================\n".
"UNSUPPORTED DIFF DETECTED IN TABLE SCHEMA \n".
"================================================================================\n".
"dbdiff.php has not yet been programmed to deal with things like Primary Keys or \n".
"table engines, encoding types, etc. So below is the CREATE sql for the table \n".
"showing the diffrences we have found. You may decide what to do from here. \n".
"\n\n".
Diff::toString($diff_result).
"\n\n".
"*/; \n"
;
$shown_unsupported_diff = true;
}
}
}
}
}
else
{
if ($this->debug)
{
echo "$table_name - SCHEMA SAME IN BOTH DBs\n";
}
}
// Now loop through the data of the table
$this->createTableDataDiff($table_name);
}
/**
* Method: createTableDataDiff
* =========================================================================
* This will create the diff sql for the data within the given table.
* Note this is entirely based on the table having a primary key.
* If it doesn't have a primary key this won't work.
*
* Parameters:
* -------------------------------------------------------------------------
* $table_name - The name of the table we are comparing.
*
* Returns:
* -------------------------------------------------------------------------
* void
*/
private function createTableDataDiff($table_name)
{
// Grab all the data from both tables
$data1 = $this->db_1->query('SELECT * FROM '.$table_name)->fetch_all(MYSQLI_ASSOC);
$data2 = $this->db_2->query('SELECT * FROM '.$table_name)->fetch_all(MYSQLI_ASSOC);
// Do a quick check to see if they are the same
if (print_r($data1, true) != print_r($data2, true))
{
// Get the primary key
// We assume the primary key is the same for both tables.
if (isset($data1[0])) $primary_key = array_keys($data1[0])[0];
elseif (isset($data2[0])) $primary_key = array_keys($data2[0])[0];
// Flaten our data results
$data = [];
foreach($data1 as $row) $data[$row[$primary_key].',1'] = $row;
foreach($data2 as $row) $data[$row[$primary_key].',2'] = $row;
// Loop through the data array
foreach($data as $key => $record)
{
// Extract the primary key id and db id
list($id, $db) = explode(',', $key);
// Basically do the opposite of one another
switch($db)
{
case 1:
if (isset($data[$id.',2']) && $data[$id.',2'] != false)
{
$this->createRowDiff($table_name, $primary_key, $id, $data[$id.',1'], $data[$id.',2']);
$data[$id.',1'] = false;
$data[$id.',2'] = false;
}
elseif (!isset($data[$id.',2']))
{
$this->sql_diff .= "DELETE FROM `$table_name` WHERE `$primary_key` = $id;\n\n";
}
break;
case 2:
if (isset($data[$id.',1']) && $data[$id.',1'] != false)
{
$this->createRowDiff($table_name, $primary_key, $id, $data[$id.',1'], $data[$id.',2']);
$data[$id.',1'] = false;
$data[$id.',2'] = false;
}
elseif (!isset($data[$id.',1']))
{
$columns = ''; $values = '';
foreach ($record as $column => $value)
{
$columns .= "'$column', ";
$values .= "'$value', ";
}
$columns = substr($columns, 0, -2);
$values = substr($values, 0, -2);
$this->sql_diff .= "INSERT INTO `$table_name` ($columns) VALUES ($values);\n\n";
}
break;
}
}
}
else
{
if ($this->debug)
{
echo "TABLE DATA SAME $table_name\n";
}
}
}
/**
* Method: createRowDiff
* =========================================================================
* Given 2 rows from a table, this will work out the diff.
* Note at this point we assume the table schema is up to date.
*
* Parameters:
* -------------------------------------------------------------------------
* $table_name - The name of the table that the rows came from
* $primary_key_name - The name of the primary key field
* $primary_key_value - The value of the primary key field
* $row_1 - The first row.
* $row_2 - The second row.
*
* Returns:
* -------------------------------------------------------------------------
* void
*/
private function createRowDiff($table_name, $primary_key_name, $primary_key_value, $row_1, $row_2)
{
// Convert the rows into a string
$row_1_string = print_r($row_1, true);
$row_2_string = print_r($row_2, true);
// Do a quick string comparison
if ($row_1_string != $row_2_string)
{
// Compare the lines of the CREATE sql
$diff_result = Diff::compare($row_1_string, $row_2_string);
// Loop through the results
$lines = new ArrayIterator($diff_result);
foreach ($lines as $key => $line)
{
// We only care about updating columns with new data
// if we try to update a column that has been dropped thats silly.
if($line[1] == Diff::INSERTED)
{
if (preg_match('/\[(.*)\] => (.*)/', trim($line[0]), $matches))
{
// Add the column to our sql diff
$this->sql_diff .= "UPDATE TABLE $table_name SET `$matches[1]` = '$matches[2]' WHERE $primary_key_name = $primary_key_value;\n\n";
}
}
}
}
else
{
if ($this->debug)
{
echo "$id - ROW IS SAME IN BOTH DBs\n";
}
}
}
}
new DbDiff();
/*
class.Diff.php
A class containing a diff implementation
Created by Stephen Morley - http://stephenmorley.org/ - and released under the
terms of the CC0 1.0 Universal legal code:
http://creativecommons.org/publicdomain/zero/1.0/legalcode
*/
// A class containing functions for computing diffs and formatting the output.
class Diff{
// define the constants
const UNMODIFIED = 0;
const DELETED = 1;
const INSERTED = 2;
/* Returns the diff for two strings. The return value is an array, each of
* whose values is an array containing two values: a line (or character, if
* $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
* line or character is in both strings), DIFF::DELETED (the line or character
* is only in the first string), and DIFF::INSERTED (the line or character is
* only in the second string). The parameters are:
*
* $string1 - the first string
* $string2 - the second string
* $compareCharacters - true to compare characters, and false to compare
* lines; this optional parameter defaults to false
*/
public static function compare(
$string1, $string2, $compareCharacters = false){
// initialise the sequences and comparison start and end positions
$start = 0;
if ($compareCharacters){
$sequence1 = $string1;
$sequence2 = $string2;
$end1 = strlen($string1) - 1;
$end2 = strlen($string2) - 1;
}else{
$sequence1 = preg_split('/\R/', $string1);
$sequence2 = preg_split('/\R/', $string2);
$end1 = count($sequence1) - 1;
$end2 = count($sequence2) - 1;
}
// skip any common prefix
while ($start <= $end1 && $start <= $end2
&& $sequence1[$start] == $sequence2[$start]){
$start ++;
}
// skip any common suffix
while ($end1 >= $start && $end2 >= $start
&& $sequence1[$end1] == $sequence2[$end2]){
$end1 --;
$end2 --;
}
// compute the table of longest common subsequence lengths
$table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
// generate the partial diff
$partialDiff =
self::generatePartialDiff($table, $sequence1, $sequence2, $start);
// generate the full diff
$diff = array();
for ($index = 0; $index < $start; $index ++){
$diff[] = array($sequence1[$index], self::UNMODIFIED);
}
while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
for ($index = $end1 + 1;
$index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
$index ++){
$diff[] = array($sequence1[$index], self::UNMODIFIED);
}
// return the diff
return $diff;
}
/* Returns the diff for two files. The parameters are:
*
* $file1 - the path to the first file
* $file2 - the path to the second file
* $compareCharacters - true to compare characters, and false to compare
* lines; this optional parameter defaults to false
*/
public static function compareFiles(
$file1, $file2, $compareCharacters = false){
// return the diff of the files
return self::compare(
file_get_contents($file1),
file_get_contents($file2),
$compareCharacters);
}
/* Returns the table of longest common subsequence lengths for the specified
* sequences. The parameters are:
*
* $sequence1 - the first sequence
* $sequence2 - the second sequence
* $start - the starting index
* $end1 - the ending index for the first sequence
* $end2 - the ending index for the second sequence
*/
private static function computeTable(
$sequence1, $sequence2, $start, $end1, $end2){
// determine the lengths to be compared
$length1 = $end1 - $start + 1;
$length2 = $end2 - $start + 1;
// initialise the table
$table = array(array_fill(0, $length2 + 1, 0));
// loop over the rows
for ($index1 = 1; $index1 <= $length1; $index1 ++){
// create the new row
$table[$index1] = array(0);
// loop over the columns
for ($index2 = 1; $index2 <= $length2; $index2 ++){
// store the longest common subsequence length
if ($sequence1[$index1 + $start - 1]
== $sequence2[$index2 + $start - 1]){
$table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
}else{
$table[$index1][$index2] =
max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
}
}
}
// return the table
return $table;
}
/* Returns the partial diff for the specificed sequences, in reverse order.
* The parameters are:
*
* $table - the table returned by the computeTable function
* $sequence1 - the first sequence
* $sequence2 - the second sequence
* $start - the starting index
*/
private static function generatePartialDiff(
$table, $sequence1, $sequence2, $start){
// initialise the diff
$diff = array();
// initialise the indices
$index1 = count($table) - 1;
$index2 = count($table[0]) - 1;
// loop until there are no items remaining in either sequence
while ($index1 > 0 || $index2 > 0){
// check what has happened to the items at these indices
if ($index1 > 0 && $index2 > 0
&& $sequence1[$index1 + $start - 1]
== $sequence2[$index2 + $start - 1]){
// update the diff and the indices
$diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
$index1 --;
$index2 --;
}elseif ($index2 > 0
&& $table[$index1][$index2] == $table[$index1][$index2 - 1]){
// update the diff and the indices
$diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
$index2 --;
}else{
// update the diff and the indices
$diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
$index1 --;
}
}
// return the diff
return $diff;
}
/* Returns a diff as a string, where unmodified lines are prefixed by ' ',
* deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
* parameters are:
*
* $diff - the diff array
* $separator - the separator between lines; this optional parameter defaults
* to "\n"
*/
public static function toString($diff, $separator = "\n"){
// initialise the string
$string = '';
// loop over the lines in the diff
foreach ($diff as $line){
// extend the string with the line
switch ($line[1]){
case self::UNMODIFIED : $string .= ' ' . $line[0];break;
case self::DELETED : $string .= '- ' . $line[0];break;
case self::INSERTED : $string .= '+ ' . $line[0];break;
}
// extend the string with the separator
$string .= $separator;
}
// return the string
return $string;
}
/* Returns a diff as an HTML string, where unmodified lines are contained
* within 'span' elements, deletions are contained within 'del' elements, and
* insertions are contained within 'ins' elements. The parameters are:
*
* $diff - the diff array
* $separator - the separator between lines; this optional parameter defaults
* to '<br>'
*/
public static function toHTML($diff, $separator = '<br>'){
// initialise the HTML
$html = '';
// loop over the lines in the diff
foreach ($diff as $line){
// extend the HTML with the line
switch ($line[1]){
case self::UNMODIFIED : $element = 'span'; break;
case self::DELETED : $element = 'del'; break;
case self::INSERTED : $element = 'ins'; break;
}
$html .=
'<' . $element . '>'
. htmlspecialchars($line[0])
. '</' . $element . '>';
// extend the HTML with the separator
$html .= $separator;
}
// return the HTML
return $html;
}
/* Returns a diff as an HTML table. The parameters are:
*
* $diff - the diff array
* $indentation - indentation to add to every line of the generated HTML; this
* optional parameter defaults to ''
* $separator - the separator between lines; this optional parameter
* defaults to '<br>'
*/
public static function toTable($diff, $indentation = '', $separator = '<br>'){
// initialise the HTML
$html = $indentation . "<table class=\"diff\">\n";
// loop over the lines in the diff
$index = 0;
while ($index < count($diff)){
// determine the line type
switch ($diff[$index][1]){
// display the content on the left and right
case self::UNMODIFIED:
$leftCell =
self::getCellContent(
$diff, $indentation, $separator, $index, self::UNMODIFIED);
$rightCell = $leftCell;
break;
// display the deleted on the left and inserted content on the right
case self::DELETED:
$leftCell =
self::getCellContent(
$diff, $indentation, $separator, $index, self::DELETED);
$rightCell =
self::getCellContent(
$diff, $indentation, $separator, $index, self::INSERTED);
break;
// display the inserted content on the right
case self::INSERTED:
$leftCell = '';
$rightCell =
self::getCellContent(
$diff, $indentation, $separator, $index, self::INSERTED);
break;
}
// extend the HTML with the new row
$html .=
$indentation
. " <tr>\n"
. $indentation
. ' <td class="diff'
. ($leftCell == $rightCell
? 'Unmodified'
: ($leftCell == '' ? 'Blank' : 'Deleted'))
. '">'
. $leftCell
. "</td>\n"
. $indentation
. ' <td class="diff'
. ($leftCell == $rightCell
? 'Unmodified'
: ($rightCell == '' ? 'Blank' : 'Inserted'))
. '">'
. $rightCell
. "</td>\n"
. $indentation
. " </tr>\n";
}
// return the HTML
return $html . $indentation . "</table>\n";
}
/* Returns the content of the cell, for use in the toTable function. The
* parameters are:
*
* $diff - the diff array
* $indentation - indentation to add to every line of the generated HTML
* $separator - the separator between lines
* $index - the current index, passes by reference
* $type - the type of line
*/
private static function getCellContent(
$diff, $indentation, $separator, &$index, $type){
// initialise the HTML
$html = '';
// loop over the matching lines, adding them to the HTML
while ($index < count($diff) && $diff[$index][1] == $type){
$html .=
'<span>'
. htmlspecialchars($diff[$index][0])
. '</span>'
. $separator;
$index ++;
}
// return the HTML
return $html;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment