Skip to content

Instantly share code, notes, and snippets.

@stefanruijsenaars
Created June 23, 2016 16:34
Show Gist options
  • Save stefanruijsenaars/c366a3097e8863d8ac003ab0497de912 to your computer and use it in GitHub Desktop.
Save stefanruijsenaars/c366a3097e8863d8ac003ab0497de912 to your computer and use it in GitHub Desktop.
diff --git a/includes/database/mysql/database.inc b/includes/database/mysql/database.inc
index a96b053..4e59d77 100644
--- a/includes/database/mysql/database.inc
+++ b/includes/database/mysql/database.inc
@@ -28,6 +28,9 @@ class DatabaseConnection_mysql extends DatabaseConnection {
$this->connectionOptions = $connection_options;
+ // Allow the charset to be overridden to utf8mb4 in settings.php.
+ $charset = $this->utf8mb4IsEnabled() && !(isset($this->connectionOptions['_dsn_utf8_fallback']) && $this->connectionOptions['_dsn_utf8_fallback'] === TRUE) ? 'utf8mb4' : 'utf8';
+
// The DSN should use either a socket or a host/port.
if (isset($connection_options['unix_socket'])) {
$dsn = 'mysql:unix_socket=' . $connection_options['unix_socket'];
@@ -39,7 +42,7 @@ class DatabaseConnection_mysql extends DatabaseConnection {
// Character set is added to dsn to ensure PDO uses the proper character
// set when escaping. This has security implications. See
// https://www.drupal.org/node/1201452 for further discussion.
- $dsn .= ';charset=utf8';
+ $dsn .= ';charset=' . $charset;
$dsn .= ';dbname=' . $connection_options['database'];
// Allow PDO options to be overridden.
$connection_options += array(
@@ -63,10 +66,10 @@ class DatabaseConnection_mysql extends DatabaseConnection {
// certain one has been set; otherwise, MySQL defaults to 'utf8_general_ci'
// for UTF-8.
if (!empty($connection_options['collation'])) {
- $this->exec('SET NAMES utf8 COLLATE ' . $connection_options['collation']);
+ $this->exec('SET NAMES ' . $charset . ' COLLATE ' . $connection_options['collation']);
}
else {
- $this->exec('SET NAMES utf8');
+ $this->exec('SET NAMES ' . $charset);
}
// Set MySQL init_commands if not already defined. Default Drupal's MySQL
@@ -206,6 +209,48 @@ class DatabaseConnection_mysql extends DatabaseConnection {
}
}
}
+
+ /**
+ * Checks whether utf8mb4 support is enabled in settings.php.
+ *
+ * @return bool
+ */
+ public function utf8mb4IsEnabled() {
+ return isset($this->connectionOptions['charset']) && $this->connectionOptions['charset'] === 'utf8mb4';
+ }
+
+ /**
+ * Checks whether utf8mb4 support is available on the current system.
+ *
+ * @return bool
+ */
+ public function utf8mb4IsSupported() {
+ // Ensure that the MySQL driver supports utf8mb4 encoding.
+ $version = $this->getAttribute(\PDO::ATTR_CLIENT_VERSION);
+ if (FALSE !== strpos($version, 'mysqlnd')) {
+ // The mysqlnd driver supports utf8mb4 starting at version 5.0.9.
+ $version = preg_replace('/^\D+([\d.]+).*/', '$1', $version);
+ if (version_compare($version, '5.0.9', '<')) {
+ return FALSE;
+ }
+ }
+ else {
+ // The libmysqlclient driver supports utf8mb4 starting at version 5.5.3.
+ if (version_compare($version, '5.5.3', '<')) {
+ return FALSE;
+ }
+ }
+
+ // Ensure that the MySQL server supports large prefixes and utf8mb4.
+ try {
+ $this->query("CREATE TABLE {drupal_utf8mb4_test} (id VARCHAR(255), PRIMARY KEY(id(255))) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci ROW_FORMAT=DYNAMIC");
+ }
+ catch (\Exception $e) {
+ return FALSE;
+ }
+ $this->query("DROP TABLE {drupal_utf8mb4_test}");
+ return TRUE;
+ }
}
diff --git a/includes/database/mysql/schema.inc b/includes/database/mysql/schema.inc
index 2a2722e..f8fea1a 100644
--- a/includes/database/mysql/schema.inc
+++ b/includes/database/mysql/schema.inc
@@ -81,7 +81,8 @@ class DatabaseSchema_mysql extends DatabaseSchema {
// Provide defaults if needed.
$table += array(
'mysql_engine' => 'InnoDB',
- 'mysql_character_set' => 'utf8',
+ // Allow the default charset to be overridden in settings.php.
+ 'mysql_character_set' => $this->connection->utf8mb4IsEnabled() ? 'utf8mb4' : 'utf8',
);
$sql = "CREATE TABLE {" . $name . "} (\n";
@@ -109,6 +110,13 @@ class DatabaseSchema_mysql extends DatabaseSchema {
$sql .= ' COLLATE ' . $info['collation'];
}
+ // The row format needs to be either DYNAMIC or COMPRESSED in order to allow
+ // for the innodb_large_prefix setting to take effect, see
+ // https://dev.mysql.com/doc/refman/5.6/en/create-table.html
+ if ($this->connection->utf8mb4IsEnabled()) {
+ $sql .= ' ROW_FORMAT=DYNAMIC';
+ }
+
// Add table comment.
if (!empty($table['description'])) {
$sql .= ' COMMENT ' . $this->prepareComment($table['description'], self::COMMENT_MAX_TABLE);
diff --git a/includes/install.inc b/includes/install.inc
index 5e1d3c6..2e9bb06 100644
--- a/includes/install.inc
+++ b/includes/install.inc
@@ -433,7 +433,35 @@ abstract class DatabaseTasks {
// This doesn't actually test the connection.
db_set_active();
// Now actually do a check.
- Database::getConnection();
+ try {
+ Database::getConnection();
+ }
+ catch (\Exception $e) {
+ // Detect utf8mb4 incompability.
+ // Error code for "Can't initialize character set" error.
+ if ($e->getCode() == 2019) {
+ $this->fail(t('Your MySQL server and PHP MySQL driver must support utf8mb4 character encoding. Make sure to use a database system that supports this (such as MySQL/MariaDB/Percona 5.5.3 and up), and that the utf8mb4 character set is compiled in. See the <a href="@documentation" target="_blank">MySQL documentation</a> for more information.', array('@documentation' => 'https://dev.mysql.com/doc/refman/5.0/en/cannot-initialize-character-set.html')));
+ $info = Database::getConnectionInfo();
+ $info_copy = $info;
+ // Set a flag to fall back to utf8. Note: this flag should only be
+ // used here and is for internal use only.
+ $info_copy['default']['_dsn_utf8_fallback'] = TRUE;
+ // In order to change the Database::$databaseInfo array, we need to
+ // remove the active connection, then re-add it with the new info.
+ Database::removeConnection('default');
+ Database::addConnectionInfo('default', 'default', $info_copy['default']);
+ // Connect with the new database info, using the utf8 character set so
+ // that we can run the checkEngineVersion test.
+ Database::getConnection();
+ // Revert to the old settings.
+ Database::removeConnection('default');
+ Database::addConnectionInfo('default', 'default', $info['default']);
+ }
+ else {
+ // Rethrow the exception.
+ throw $e;
+ }
+ }
$this->pass('Drupal can CONNECT to the database ok.');
}
catch (Exception $e) {
diff --git a/modules/system/system.install b/modules/system/system.install
index 323b7b3..b1282cf 100644
--- a/modules/system/system.install
+++ b/modules/system/system.install
@@ -196,6 +196,12 @@ function system_requirements($phase) {
);
}
+ // Test database-specific multi-byte UTF-8 related requirements.
+ $charset_requirements = _system_check_db_utf8mb4_requirements();
+ if (!empty($charset_requirements)) {
+ $requirements['database_charset'] = $charset_requirements;
+ }
+
// Test PHP memory_limit
$memory_limit = ini_get('memory_limit');
$requirements['php_memory_limit'] = array(
@@ -518,6 +524,56 @@ function system_requirements($phase) {
}
/**
+ * Checks whether the requirements for multi-byte UTF-8 support are met.
+ *
+ * @return array
+ * A requirements array with the result of the charset check.
+ */
+function _system_check_db_utf8mb4_requirements() {
+ global $install_state;
+ $connection = Database::getConnection();
+ // Skip the requirements check if we're in the initial step of the installer
+ // where the settings have not yet been verified, or if using
+ // a database driver that does not have any special UTF-8-related requirements
+ // (such as PostgreSQL or SQLite).
+ if ($install_state['settings_verified'] === FALSE || !method_exists($connection, 'utf8mb4IsEnabled') || !method_exists($connection, 'utf8mb4IsSupported')) {
+ return array();
+ }
+ $t = get_t();
+ $requirements['title'] = $t('Database multi-byte UTF-8 support');
+
+ $utf8mb4_enabled = $connection->utf8mb4IsEnabled();
+ $utf8mb4_supported = $connection->utf8mb4IsSupported();
+ $driver = $connection->driver();
+
+ if ($utf8mb4_enabled) {
+ if ($utf8mb4_supported) {
+ $requirements['value'] = $t('Enabled');
+ $requirements['description'] = $t('Multi-byte UTF-8 for @driver is enabled.', array('@driver' => $driver));
+ $requirements['severity'] = REQUIREMENT_OK;
+ }
+ else {
+ $requirements['value'] = $t('Not supported');
+ $requirements['description'] = $t('Multi-byte UTF-8 for @driver is activated in settings.php, but not supported on your system. Please turn this off in settings.php, or ensure that all requirements are met. See <a href="https://www.drupal.org/node/2754539">https://www.drupal.org/node/2754539</a> for more information.', array('@driver' => $driver));
+ $requirements['severity'] = REQUIREMENT_ERROR;
+ }
+ }
+ else {
+ if ($utf8mb4_supported) {
+ $requirements['value'] = $t('Not enabled');
+ $requirements['description'] = $t('Multi-byte UTF-8 for @driver is activated in settings.php, but not supported on your system. Please turn this off in settings.php, or ensure that all requirements are met. See <a href="https://www.drupal.org/node/2754539">https://www.drupal.org/node/2754539</a> for more information.', array('@driver' => $driver));
+ $requirements['severity'] = REQUIREMENT_WARNING;
+ }
+ else {
+ $requirements['value'] = $t('Disabled');
+ $requirements['description'] = $t('Multi-byte UTF-8 for @driver is disabled.', array('@driver' => $driver));
+ $requirements['severity'] = REQUIREMENT_INFO;
+ }
+ }
+ return $requirements;
+}
+
+/**
* Implements hook_install().
*/
function system_install() {
diff --git a/sites/default/default.settings.php b/sites/default/default.settings.php
index 7e36a4a..47cde4f 100644
--- a/sites/default/default.settings.php
+++ b/sites/default/default.settings.php
@@ -126,6 +126,30 @@
* );
* @endcode
*
+ * For handling full UTF-8 in MySQL, including multi-byte characters such as
+ * emojis, asian symbols, and mathematical symbols, you may set the collation
+ * and charset to "utf8mb4":
+ * @code
+ * $databases['default']['default']['charset'] = 'utf8mb4';
+ * $databases['default']['default']['collation'] = 'utf8mb4_general_ci';
+ * @endcode
+ * When using this setting on an existing installation, ensure that
+ * all existing tables have been converted to the utf8mb4 charset,
+ * for example by using the utf8mb4_convert contrib module, so as to prevent
+ * mixing data with different charsets.
+ * Note this should only be used when all of the following conditions are met:
+ * - In order to allow for large indexes, MySQL must be set up with the
+ * following my.cnf settings:
+ * [mysqld]
+ * innodb_large_prefix=0
+ * innodb_file_format=barracuda
+ * innodb_file_per_table=true
+ * These settings are available as of MySQL 5.5.14, and are defaults in
+ * MySQL 5.7.7 and up.
+ * - The PHP MySQL driver must support the utf8mb4 charset (libmysqlclient
+ 5.5.3 and up, as well as mysqlnd 5.0.9 and up).
+ * - The MySQL server must support the utf8mb4 charset (5.5.3 and up).
+ *
* You can optionally set prefixes for some or all database table names
* by using the 'prefix' setting. If a prefix is specified, the table
* name will be prepended with its value. Be sure to use valid database
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment