Skip to content

Instantly share code, notes, and snippets.

@coolamit
Last active January 4, 2019 00:29
Show Gist options
  • Save coolamit/bccf3b942f1c4fc5c97d00a7b6efb047 to your computer and use it in GitHub Desktop.
Save coolamit/bccf3b942f1c4fc5c97d00a7b6efb047 to your computer and use it in GitHub Desktop.
PHP script to convert data from latin1 to utf8. The script expects table columns to already be on utf8 charset and 'utf8_unicode_ci' collation. It just re-encodes data from latin1 to utf8 so that the data is legible instead of appearing as gibberish.
<?php
/**
* PHP cli script to re-encode data in a MySQL database from latin1 to utf8.
* The script expects table columns to already be on utf8 charset and 'utf8_unicode_ci' collation.
* It just re-encodes data from latin1 to utf8 so that the data is legible instead of appearing as gibberish.
*
* @author Amit Gupta <http://amitgupta.in/>
* @copyright 2019 Amit Gupta <http://amitgupta.in/>
* @licence GPL v3
*/
class DB_Unicode_Conversion {
private $_creds = [
's' => 'localhost',
'd' => '',
'u' => '',
'p' => '',
];
protected $_connection;
protected $_schema = [];
const CHARSET = 'utf8';
const COLLATION = 'utf8_unicode_ci';
public function __construct() {
$this->_make_connection();
}
public function __destruct() {
$this->_sever_connection();
}
protected function _make_connection() : void {
$this->_connection = new mysqli(
$this->_creds['s'],
$this->_creds['u'],
$this->_creds['p'],
$this->_creds['d']
);
if ( $this->_connection->connect_error ) {
$this->_output( sprintf( 'Connection failed: %s', $this->_connection->connect_error ) );
die;
}
$this->_output( sprintf( 'Connection opened to DB %s', $this->_creds['d'] ), 2 );
}
protected function _sever_connection() : void {
$this->_connection->close();
$this->_output();
$this->_output( sprintf( 'Connection closed to DB %s', $this->_creds['d'] ), 2 );
}
protected function _output( string $text = '', int $number_of_breaks = 1 ) : void {
echo $text;
for ( $i = 0; $i < $number_of_breaks; $i++ ) {
echo "\n";
}
}
public function execute_plan() : void {
$tables = $this->_get_tables();
$this->_gather_schema( $tables );
$this->_convert_data();
}
protected function _get_tables() : array {
$tables = [];
$sql = 'SHOW TABLES';
$result = $this->_connection->query( $sql );
if ( $result->num_rows > 0 ) {
$this->_output( sprintf( '%d tables found', $result->num_rows ) );
while ( $row = $result->fetch_row() ) {
$tables[] = $row[0];
}
}
return $tables;
}
protected function _gather_schema( array $tables = [] ) : void {
if ( empty( $tables ) ) {
$this->_output( sprintf( 'No tables found in DB %s', $this->_creds['d'] ) );
return;
}
$sql_raw = "SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_SET_NAME, COLLATION_NAME, COLUMN_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s';";
$this->_output( sprintf( 'Generating schema for %d tables', count( $tables ) ) );
for ( $i = 0; $i < count( $tables ); $i++ ) {
$sql = sprintf( $sql_raw, $this->_creds['d'], $tables[ $i ] );
$result = $this->_connection->query( $sql );
if ( $result->num_rows > 0 ) {
$columns = [];
while ( $row = $result->fetch_assoc() ) {
if (
self::CHARSET === strtolower( $row['CHARACTER_SET_NAME'] )
&& self::COLLATION === strtolower( $row['COLLATION_NAME'] )
) {
$columns[] = $row['COLUMN_NAME'];
}
} //end while
if ( ! empty( $columns ) ) {
$this->_schema[ $tables[ $i ] ] = $columns;
}
unset( $columns );
}
} //end for
}
protected function _convert_data() : void {
if ( empty( $this->_schema ) || ! is_array( $this->_schema ) ) {
$this->_output( sprintf( 'No schema generated for conversion in DB %s', $this->_creds['d'] ) );
return;
}
$counter = 1;
$this->_output( sprintf( 'Running conversion on %d tables', count( $this->_schema ) ), 2 );
foreach ( $this->_schema as $table => $columns ) {
$sql_cols = [];
for ( $i = 0; $i < count( $columns ); $i++ ) {
$sql_cols[] = sprintf( '%1$s=CONVERT(CAST(CONVERT(%1$s USING latin1) AS BINARY) USING utf8)', $columns[ $i ] );
} //end for
$sql = sprintf( 'UPDATE %s SET %s;', $table, implode( ', ', $sql_cols ) );
$result = $this->_connection->query( $sql );
if ( true === $result ) {
$this->_output( sprintf( '%d. %s update successful', $counter, $table ) );
} else {
$this->_output( sprintf( '%d. %s update FAILED', $counter, $table ) );
}
$counter++;
} //end foreach
}
} //end of class
$o_converter = new DB_Unicode_Conversion();
$o_converter->execute_plan();
//EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment