Skip to content

Instantly share code, notes, and snippets.

@joelpittet
Last active August 10, 2016 11:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joelpittet/14c155902e6b62bc8c8e to your computer and use it in GitHub Desktop.
Save joelpittet/14c155902e6b62bc8c8e to your computer and use it in GitHub Desktop.
Drupal MySQL character set converter to utf8mb4
<?php
// Helper for https://www.drupal.org/node/2488180
// Run: drush src charset-converter.php
class CharsetConverter {
/**
* Character set.
* @var string
*/
private $charset = 'utf8';
/**
* Collation.
* @var string
*/
private $collation = 'utf8_unicode_ci';
public function __construct($charset = NULL, $collation = NULL) {
$this->charset = !$charset ?: 'utf8';
$this->collation = !$collation ?: 'utf8_unicode_ci';
}
/**
* Convert the MySQL drupal databases character set and collation.
*
* @param array $databases
* The Drupal 7 database array.
*/
public function convert(array $databases) {
foreach ($databases as $database_key => $database_values) {
foreach ($database_values as $target => $database) {
// MySQL, mysqli, mysqlnd, etc.
if (strpos($database['driver'], 'mysql') === 0) {
drush_print('Target MySQL database: ' . $database_key . ':' . $target);
// Connect to next database.
db_set_active($database_key);
// Check the database type is mysql.
$db_type = Database::getConnection()->databaseType();
// Skip if not MySQL.
if ($db_type !== 'mysql') {
continue;
}
// @todo check if large prefixes is enabled.
// Get the charset and collation we are going to change to from the config.
$this->charset = (!empty($database['charset'])) ? $database['charset'] : 'utf8';
$this->collation = (!empty($database['collation'])) ? $database['collation'] : 'utf8_unicode_ci';
// For each database:
$this->convertDatabase($database['database']);
// For each table in the database.
$this->convertTables();
// // For each column: (Doesn't seem to be needed)
// ALTER TABLE table_name CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
}
}
}
// Reset connection to default.
db_set_active();
}
/**
* @param string
* Database name.
*
* @param string $charset
* (Optional) The character set.
* @param string $collation
* (Optional) The collation.
*
* @return bool
* success|failure.
*/
public function convertDatabase($database_name, $charset = NULL, $collation = NULL) {
drush_print('Converting database: ' . $database_name);
$sql = "ALTER DATABASE " . $database_name . " CHARACTER SET = :charset COLLATE = :collation;";
return db_query($sql, array(
':charset' => $charset ?: $this->charset,
':collation' => $collation ?: $this->collation,
));
}
/**
* Converts all the tables defined by drupal_get_schema().
*
* @param string $charset
* (Optional) The character set.
* @param string $collation
* (Optional) The collation.
*
* @return bool
* success|failure.
*/
public function convertTables($charset = NULL, $collation = NULL) {
// For each table:
// Deal only with Drupal managed tables.
$schema = drupal_get_schema();
$table_names = array_keys($schema);
sort($table_names);
foreach (array_keys($schema) as $table_name) {
if (!db_table_exists($table_name)) {
continue;
}
$this->convertTable($table_name, $charset, $collation);
}
}
/**
* Converts a table to a desired character set and collation.
*
* @param string $table_name
* The database table name.
* @param string $charset
* (Optional) The character set.
* @param string $collation
* (Optional) The collation.
*
* @return bool
* success|failure.
*/
public function convertTable($table_name, $charset = NULL, $collation = NULL) {
$sql = "ALTER TABLE {" . $table_name . "} CHARACTER SET = :charset COLLATE = :collation";
drush_print('Converting table: ' . $table_name);
$result = db_query($sql, array(
':charset' => $charset ?: $this->charset,
':collation' => $collation ?: $this->collation,
));
db_query("ALTER TABLE {" . $table_name . "} ROW_FORMAT=DYNAMIC");
db_query("REPAIR TABLE {" . $table_name . "}");
db_query("OPTIMIZE TABLE {" . $table_name . "}");
$this->convertTableFields($table_name, $charset, $collation);
return $result;
}
/**
* Converts a table's field to a desired character set and collation.
*
* @param string $table_name
* The database table name.
* @param string $charset
* (Optional) The character set.
* @param string $collation
* (Optional) The collation.
*
* @return bool
* success|failure.
*/
public function convertTableFields($table_name, $charset = NULL, $collation = NULL) {
$table_name = Database::getConnection()->prefixTables('{' . db_escape_table($table_name) . '}');
$results = db_query("SHOW FULL FIELDS FROM {" . $table_name . "}")->fetchAllAssoc('Field');
$charset = $charset ?: $this->charset;
$collation = $collation ?: $this->collation;
foreach ($results as $row) {
// Skip fields that don't have collation, their probably int or something.
// or if we are using that collation for this field already save a query
// or is not binary.
if (!$row->Collation || $row->Collation === $collation || strpos($row->Collation, '_bin') !== FALSE) {
continue;
}
$sql = "ALTER TABLE {" . $table_name . "} MODIFY " . $row->Field . " " . $row->Type . " CHARACTER SET :charset COLLATE :collation";
db_query($sql, array(
':charset' => $charset,
':collation' => $collation,
));
}
}
}
$converter = new CharsetConverter();
global $databases;
$converter->convert($databases);
@stefanruijsenaars
Copy link

stefanruijsenaars commented Aug 10, 2016

This loses default value, not null and comment definitions because of the MODIFY column statement :(

See https://www.drupal.org/node/2781545

Anyone wanting this functionality, consider using beta2 or later of https://www.drupal.org/project/utf8mb4_convert instead

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment