Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save goodwoor/9bf28170f20cab188d80591a50f87644 to your computer and use it in GitHub Desktop.
Save goodwoor/9bf28170f20cab188d80591a50f87644 to your computer and use it in GitHub Desktop.
diff --git a/var/tools/russian_symbols_checker/readme.md b/var/tools/russian_symbols_checker/readme.md
new file mode 100644
index 000000000..88244bc6a
--- /dev/null
+++ b/var/tools/russian_symbols_checker/readme.md
@@ -0,0 +1,42 @@
+# Russian symbols checker
+
+## Назначение
+
+Поиск русских символов в международном контенте.
+
+## Подготовка к запуску
+
+Для запуска скрипта необходимо локально развернуть дампы демок. Сам скрипт (файл run.php) разместить в корне архива демки.
+
+> Следующие действия необходимо сделать отдельно для каждой демки:
+> - скачать архив демки
+> - создать БД: `CREATE DATABASE [db_name]`
+> - в скачанном архиве найти дамп базы (обычно называется dump.sql), развернуть его в созданной БД
+> - в докере можно сделать с помощью команды внутри контейнера:
+> ```
+> mysql -h[host] -u[user] -p[password] [db_name] < path/to/dump_file/dump.sql
+> ```
+> Подробнее - пункт 1.4 из статьи - https://t.cs-cart.com/youtrack/articles/1-A-229/Lokalnaya-ustanovka-helpdesk-s-pomoshchyu-docker
+
+Скрипт запускается последовательно для каждой из демок, в имени исходной БД указывается имя развёрнутой базы демки.
+Если работаете с докером, то запускать скрипт нужно из контейнера и архив с демкой должен лежать внутри докер-окружения
+
+При необходимости детализации данных лучше использовать дебаг.
+Массив $selected_values содержит в себе всю информацию о некорректных значениях.
+
+## Запуск из консоли
+
+```
+php /path/to/helpdesk/var/tools/russian_symbols_checker/run.php
+ --host=... - хост исходной БД
+ --user=... - пользователь исходной БД
+ --password=... - пароль пользователя исходной БД
+ --db_name=... - имя исходной БД
+ --output=file - вывод в файл [db_name]_info.txt, необязательная опция, если не указать - вывод в консоль
+```
+
+Пример:
+```
+php run.php --host=mysql5.7. --user=root --password=root --db_name=apparel_db --output=file
+```
diff --git a/var/tools/russian_symbols_checker/run.php b/var/tools/russian_symbols_checker/run.php
new file mode 100644
index 000000000..2cb1406cd
--- /dev/null
+++ b/var/tools/russian_symbols_checker/run.php
@@ -0,0 +1,376 @@
+<?php
+
+if (php_sapi_name() != 'cli') {
+ die('Access denied');
+}
+ini_set('memory_limit', '1000M');
+
+$option_names = [
+ 'host:',
+ 'user:',
+ 'password:',
+ 'db_name:',
+ 'output::'
+];
+$options = getopt('', $option_names);
+
+$db = new mysqli($options['host'], $options['user'], $options['password'], $options['db_name']);
+$db->set_charset('utf8mb4');
+
+if ($db->connect_errno) {
+ echo $db->connect_error . PHP_EOL;
+ exit();
+}
+
+$table_columns = fn_symbols_checker_get_tables_info($db, $options);
+$selected_values = fn_symbols_checker_search_russian_symbols($db, $table_columns);
+
+$statistic = fn_symbols_checker_get_statistic_info($selected_values);
+if (isset($options['output']) && ($options['output'] === 'file')) {
+ fn_symbols_checker_put_result_in_file($selected_values, $options, $statistic);
+} else {
+ fn_symbols_checker_show_statistic_in_console($selected_values, $options, $statistic);
+}
+
+/**
+ * Execute tables, columns and primary keys
+ *
+ * @param mysqli $db Db connection
+ * @param array $options Options from console
+ *
+ * @return array Array of columns, primary keys for each table in database
+ */
+function fn_symbols_checker_get_tables_info($db, $options)
+{
+ $tables_query = 'show tables';
+ $tables = $db->query($tables_query);
+
+ if (!($tables->num_rows > 0)) {
+ return [];
+ }
+
+ $tables_list = $tables->fetch_all();
+
+ $table_columns = [];
+ foreach ($tables_list as $table_name) {
+ $table_name = array_shift($table_name);
+ $columns_info = $db->query('SHOW COLUMNS FROM ' . $table_name);
+ if (empty($columns_info)) {
+ continue;
+ }
+
+ $columns_info = $columns_info->fetch_all();
+ foreach ($columns_info as $column) {
+ $table_columns[$table_name]['columns'][] = array_shift($column);
+ }
+
+ $primary_keys_info = $db->query(
+ "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{$options['db_name']}' AND TABLE_NAME = '{$table_name}' AND COLUMN_KEY = 'PRI';"
+ );
+
+ if (empty($primary_keys_info)) {
+ continue;
+ }
+
+ $query_result = $primary_keys_info->fetch_all();
+ $primary_keys = $query_result;
+ if ($primary_keys === null) {
+ $table_columns[$table_name]['primary_keys'] = null;
+ continue;
+ }
+
+ foreach ($primary_keys as $p_key_array) {
+ $p_key = array_shift($p_key_array);
+ $table_columns[$table_name]['primary_keys'][] = $p_key;
+ }
+ }
+
+ foreach ($table_columns as $table_name => $table_info) {
+ foreach ($table_info['columns'] as $column_name) {
+ if ($column_name === 'lang_code' && $table_name !== 'cscart_companies') {
+ // flag for tables with 'lang_code' field
+ $table_columns[$table_name]['is_lang_table'] = 'Y';
+ // lang_code = 'ru', lang_code = 'uk' don't need to search
+ $table_columns[$table_name]['additional_condition'] = " AND lang_code != 'ru' AND lang_code != 'RU' AND lang_code != 'uk' AND lang_code != 'UK'";
+ break;
+ }
+ }
+ }
+
+ return $table_columns;
+}
+
+/**
+ * Prepare and execute search queries
+ *
+ * @param mysqli $db Db connection
+ * @param array $table_columns Columns, primary keys for each table in database
+ *
+ * @return array Array of founded russian values
+ */
+function fn_symbols_checker_search_russian_symbols($db, $table_columns)
+{
+ // russian symbols 'с' and 'С' was deleted
+ $russian_symbols = [
+ 'А', 'а', 'Б', 'б', 'В', 'в', 'Г', 'г', 'Д', 'д', 'Е', 'е', 'Ё', 'ё', 'Ж', 'ж', 'З', 'з',
+ 'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р',
+ 'Т', 'т', 'У', 'у', 'Ф', 'ф', 'Х', 'х', 'Ц', 'ц', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ъ', 'ъ',
+ 'Ы', 'ы', 'Ь', 'ь', 'Э', 'э', 'Ю', 'ю', 'Я', 'я'
+ ];
+ $selected_values = [];
+
+ foreach ($table_columns as $table_name => $table_info) {
+ if (count($table_info['primary_keys']) > 1) {
+ $primary_keys = implode(', ', $table_info['primary_keys']);
+ } elseif (count($table_info['primary_keys']) === 1) {
+ $primary_keys = array_shift($table_info['primary_keys']);
+ } else {
+ $primary_keys = '';
+ }
+
+ foreach ($table_info['columns'] as $column_name) {
+ if (!empty($primary_keys)) {
+ $format_select_query = 'SELECT %s, %s FROM %s WHERE ';
+ $select_query = sprintf(
+ $format_select_query,
+ $primary_keys,
+ $column_name,
+ $table_name
+ );
+ } else {
+ $format_select_query = 'SELECT %s FROM %s WHERE ';
+ $select_query = sprintf(
+ $format_select_query,
+ $column_name,
+ $table_name
+ );
+ }
+
+ foreach ($russian_symbols as $symbol) {
+ if ($symbol === 'А') {
+ // first condition without 'OR' and with '('
+ $select_query = $select_query . "({$column_name} LIKE '%{$symbol}%'";
+ continue;
+ }
+ if ($symbol === 'я') {
+ // last condition with ')'
+ $select_query = $select_query . " OR {$column_name} LIKE '%{$symbol}%')";
+ continue;
+ }
+ $select_query = $select_query . " OR {$column_name} LIKE '%{$symbol}%'";
+ }
+
+ if (!empty($table_info['additional_condition'])) {
+ $select_query = $select_query . $table_info['additional_condition'];
+ }
+
+ // very long queries
+ $query_result = $db->query($select_query);
+ if ($query_result->num_rows === 0 || !$query_result) {
+ continue;
+ }
+
+ $selected_values[$table_name]['is_lang_table'] = ($table_info['is_lang_table'] === 'Y') ? 'Y' : 'N';
+
+ while ($row = $query_result->fetch_assoc()) {
+ $info = [
+ 'incorrect_value' => array_pop($row),
+ 'primary_values' => $row,
+ ];
+
+ if ($table_info['is_lang_table'] === 'Y') {
+ $info['is_en_value'] = ($row['lang_code'] === 'EN' || $row['lang_code'] === 'en') ? 'Y' : 'N';
+ }
+
+ $selected_values[$table_name]['columns'][$column_name][] = $info;
+ }
+ }
+ }
+ return $selected_values;
+}
+
+/**
+ * Execute statistic from result
+ *
+ * @param array $selected_values Array of founded russian values
+ *
+ * @return array Statistic info
+ */
+function fn_symbols_checker_get_statistic_info($selected_values)
+{
+ $values_in_column_count = [];
+ $tables_count = 0;
+ $total_count = 0;
+
+ foreach ($selected_values as $table_name => $table_info) {
+ $tables_count += 1;
+ foreach ($table_info['columns'] as $column_name => $values) {
+ $values_in_column_count[$table_name][$column_name] += count($values);
+ $total_count += count($values);
+ }
+ }
+
+ return [
+ 'tables_count' => $tables_count,
+ 'total_count' => $total_count,
+ 'values_in_column_count' => $values_in_column_count
+ ];
+}
+
+/**
+ * Put result in txt file ([db_name]_info.txt)
+ *
+ * @param array $selected_values Array of founded russian values
+ * @param array $options Options from console
+ * @param array $statistic Statistic info
+ *
+ * @return void
+ */
+function fn_symbols_checker_put_result_in_file($selected_values, $options, $statistic)
+{
+ $tables_count = $statistic['tables_count'];
+ $total_count = $statistic['total_count'];
+ $values_in_column_count = $statistic['values_in_column_count'];
+
+ $handle = fopen($options['db_name'] . '_info.txt', 'w+');
+
+ $file_string = PHP_EOL;
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+
+ $file_string .= 'Statistic by russian symbols '. PHP_EOL;
+ $file_string .= 'DB name: ' . $options['db_name'] . PHP_EOL;
+ $file_string .= 'Tables count: ' . $tables_count . PHP_EOL;
+ $file_string .= 'Total count of russian values: ' . $total_count . PHP_EOL;
+
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+
+ $file_string .= 'Detailed tables info: ' . PHP_EOL;
+ $file_string .= PHP_EOL;
+ foreach ($values_in_column_count as $table_name => $columns) {
+ $file_string .= ' - ' . $table_name . ':' . PHP_EOL;
+ foreach ($columns as $column_name => $column_info) {
+ $file_string .= ' - ' . $column_name . ': ' . $column_info . PHP_EOL;
+ }
+ $file_string .= PHP_EOL;
+ }
+
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+
+ $file_string .= 'Detailed values info: ' . PHP_EOL;
+ $file_string .= PHP_EOL;
+
+ foreach ($selected_values as $table_name => $table_info) {
+ $file_string .= ' - ' . $table_name . ': '. PHP_EOL;
+ foreach ($table_info['columns'] as $column_name => $columns_for_query) {
+ $file_string .= ' - ' . $column_name . ': ' . PHP_EOL;
+ foreach ($columns_for_query as $value) {
+ $file_string .= ' - ' . $value['incorrect_value'] . ';'. PHP_EOL;
+ }
+ $file_string .= PHP_EOL;
+ }
+ $file_string .= PHP_EOL;
+ }
+
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= '-------------------------------------------------------------------------------------------------';
+ $file_string .= PHP_EOL;
+ $file_string .= PHP_EOL;
+
+ fwrite($handle, $file_string);
+ fclose($handle);
+}
+
+/**
+ * Show result in console
+ *
+ * @param array $selected_values Array of founded russian values
+ * @param array $options Options from console
+ * @param array $statistic Statistic info
+ *
+ * @return void
+ */
+function fn_symbols_checker_show_statistic_in_console($selected_values, $options, $statistic)
+{
+ $tables_count = $statistic['tables_count'];
+ $total_count = $statistic['total_count'];
+ $values_in_column_count = $statistic['values_in_column_count'];
+
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+ echo('Statistic by russian symbols: ' . PHP_EOL);
+ echo('DB name: ' . $options['db_name'] . PHP_EOL);
+ echo('Tables count: ' . $tables_count . PHP_EOL);
+ echo('Total count of russian values: ' . $total_count . PHP_EOL);
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+
+ echo('Detailed tables info: ' . PHP_EOL);
+ echo(PHP_EOL);
+ foreach ($values_in_column_count as $table_name => $columns) {
+ echo(' - ' . $table_name . ':' . PHP_EOL);
+ foreach ($columns as $column_name => $column_info) {
+ echo(' - ' . $column_name . ': ' . $column_info . PHP_EOL);
+ }
+ echo(PHP_EOL);
+ }
+
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+
+ echo('Detailed values info: ' . PHP_EOL);
+ echo(PHP_EOL);
+
+ foreach ($selected_values as $table_name => $columns) {
+ echo(' - ' . $table_name . ': ' . PHP_EOL);
+ foreach ($columns['columns'] as $column_name => $columns_for_query) {
+ echo(' - ' . $column_name . ': '. PHP_EOL);
+ foreach ($columns_for_query as $value) {
+ echo(' - ' . $value['incorrect_value'] . ': '. PHP_EOL);
+ }
+ echo(PHP_EOL);
+ }
+ echo(PHP_EOL);
+ }
+
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo('-------------------------------------------------------------------------------------------------');
+ echo(PHP_EOL);
+ echo(PHP_EOL);
+}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment