Last active
August 12, 2022 07:43
-
-
Save goodwoor/9bf28170f20cab188d80591a50f87644 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/var/tools/russian_symbols_checker/readme.md b/var/tools/russian_symbols_checker/readme.md | |
new file mode 100644 | |
index 000000000..88244bc6a | |
--- /dev/null | |
+++ b/var/tools/russian_symbols_checker/readme.md | |
@@ -0,0 +1,42 @@ | |
+# Russian symbols checker | |
+ | |
+## Назначение | |
+ | |
+Поиск русских символов в международном контенте. | |
+ | |
+## Подготовка к запуску | |
+ | |
+Для запуска скрипта необходимо локально развернуть дампы демок. Сам скрипт (файл run.php) разместить в корне архива демки. | |
+ | |
+> Следующие действия необходимо сделать отдельно для каждой демки: | |
+> - скачать архив демки | |
+> - создать БД: `CREATE DATABASE [db_name]` | |
+> - в скачанном архиве найти дамп базы (обычно называется dump.sql), развернуть его в созданной БД | |
+> - в докере можно сделать с помощью команды внутри контейнера: | |
+> ``` | |
+> mysql -h[host] -u[user] -p[password] [db_name] < path/to/dump_file/dump.sql | |
+> ``` | |
+> Подробнее - пункт 1.4 из статьи - https://t.cs-cart.com/youtrack/articles/1-A-229/Lokalnaya-ustanovka-helpdesk-s-pomoshchyu-docker | |
+ | |
+Скрипт запускается последовательно для каждой из демок, в имени исходной БД указывается имя развёрнутой базы демки. | |
+Если работаете с докером, то запускать скрипт нужно из контейнера и архив с демкой должен лежать внутри докер-окружения | |
+ | |
+При необходимости детализации данных лучше использовать дебаг. | |
+Массив $selected_values содержит в себе всю информацию о некорректных значениях. | |
+ | |
+## Запуск из консоли | |
+ | |
+``` | |
+php /path/to/helpdesk/var/tools/russian_symbols_checker/run.php | |
+ --host=... - хост исходной БД | |
+ --user=... - пользователь исходной БД | |
+ --password=... - пароль пользователя исходной БД | |
+ --db_name=... - имя исходной БД | |
+ --output=file - вывод в файл [db_name]_info.txt, необязательная опция, если не указать - вывод в консоль | |
+``` | |
+ | |
+Пример: | |
+``` | |
+php run.php --host=mysql5.7. --user=root --password=root --db_name=apparel_db --output=file | |
+``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/var/tools/russian_symbols_checker/run.php b/var/tools/russian_symbols_checker/run.php | |
new file mode 100644 | |
index 000000000..2cb1406cd | |
--- /dev/null | |
+++ b/var/tools/russian_symbols_checker/run.php | |
@@ -0,0 +1,376 @@ | |
+<?php | |
+ | |
+if (php_sapi_name() != 'cli') { | |
+ die('Access denied'); | |
+} | |
+ini_set('memory_limit', '1000M'); | |
+ | |
+$option_names = [ | |
+ 'host:', | |
+ 'user:', | |
+ 'password:', | |
+ 'db_name:', | |
+ 'output::' | |
+]; | |
+$options = getopt('', $option_names); | |
+ | |
+$db = new mysqli($options['host'], $options['user'], $options['password'], $options['db_name']); | |
+$db->set_charset('utf8mb4'); | |
+ | |
+if ($db->connect_errno) { | |
+ echo $db->connect_error . PHP_EOL; | |
+ exit(); | |
+} | |
+ | |
+$table_columns = fn_symbols_checker_get_tables_info($db, $options); | |
+$selected_values = fn_symbols_checker_search_russian_symbols($db, $table_columns); | |
+ | |
+$statistic = fn_symbols_checker_get_statistic_info($selected_values); | |
+if (isset($options['output']) && ($options['output'] === 'file')) { | |
+ fn_symbols_checker_put_result_in_file($selected_values, $options, $statistic); | |
+} else { | |
+ fn_symbols_checker_show_statistic_in_console($selected_values, $options, $statistic); | |
+} | |
+ | |
+/** | |
+ * Execute tables, columns and primary keys | |
+ * | |
+ * @param mysqli $db Db connection | |
+ * @param array $options Options from console | |
+ * | |
+ * @return array Array of columns, primary keys for each table in database | |
+ */ | |
+function fn_symbols_checker_get_tables_info($db, $options) | |
+{ | |
+ $tables_query = 'show tables'; | |
+ $tables = $db->query($tables_query); | |
+ | |
+ if (!($tables->num_rows > 0)) { | |
+ return []; | |
+ } | |
+ | |
+ $tables_list = $tables->fetch_all(); | |
+ | |
+ $table_columns = []; | |
+ foreach ($tables_list as $table_name) { | |
+ $table_name = array_shift($table_name); | |
+ $columns_info = $db->query('SHOW COLUMNS FROM ' . $table_name); | |
+ if (empty($columns_info)) { | |
+ continue; | |
+ } | |
+ | |
+ $columns_info = $columns_info->fetch_all(); | |
+ foreach ($columns_info as $column) { | |
+ $table_columns[$table_name]['columns'][] = array_shift($column); | |
+ } | |
+ | |
+ $primary_keys_info = $db->query( | |
+ "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{$options['db_name']}' AND TABLE_NAME = '{$table_name}' AND COLUMN_KEY = 'PRI';" | |
+ ); | |
+ | |
+ if (empty($primary_keys_info)) { | |
+ continue; | |
+ } | |
+ | |
+ $query_result = $primary_keys_info->fetch_all(); | |
+ $primary_keys = $query_result; | |
+ if ($primary_keys === null) { | |
+ $table_columns[$table_name]['primary_keys'] = null; | |
+ continue; | |
+ } | |
+ | |
+ foreach ($primary_keys as $p_key_array) { | |
+ $p_key = array_shift($p_key_array); | |
+ $table_columns[$table_name]['primary_keys'][] = $p_key; | |
+ } | |
+ } | |
+ | |
+ foreach ($table_columns as $table_name => $table_info) { | |
+ foreach ($table_info['columns'] as $column_name) { | |
+ if ($column_name === 'lang_code' && $table_name !== 'cscart_companies') { | |
+ // flag for tables with 'lang_code' field | |
+ $table_columns[$table_name]['is_lang_table'] = 'Y'; | |
+ // lang_code = 'ru', lang_code = 'uk' don't need to search | |
+ $table_columns[$table_name]['additional_condition'] = " AND lang_code != 'ru' AND lang_code != 'RU' AND lang_code != 'uk' AND lang_code != 'UK'"; | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ | |
+ return $table_columns; | |
+} | |
+ | |
+/** | |
+ * Prepare and execute search queries | |
+ * | |
+ * @param mysqli $db Db connection | |
+ * @param array $table_columns Columns, primary keys for each table in database | |
+ * | |
+ * @return array Array of founded russian values | |
+ */ | |
+function fn_symbols_checker_search_russian_symbols($db, $table_columns) | |
+{ | |
+ // russian symbols 'с' and 'С' was deleted | |
+ $russian_symbols = [ | |
+ 'А', 'а', 'Б', 'б', 'В', 'в', 'Г', 'г', 'Д', 'д', 'Е', 'е', 'Ё', 'ё', 'Ж', 'ж', 'З', 'з', | |
+ 'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р', | |
+ 'Т', 'т', 'У', 'у', 'Ф', 'ф', 'Х', 'х', 'Ц', 'ц', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ъ', 'ъ', | |
+ 'Ы', 'ы', 'Ь', 'ь', 'Э', 'э', 'Ю', 'ю', 'Я', 'я' | |
+ ]; | |
+ $selected_values = []; | |
+ | |
+ foreach ($table_columns as $table_name => $table_info) { | |
+ if (count($table_info['primary_keys']) > 1) { | |
+ $primary_keys = implode(', ', $table_info['primary_keys']); | |
+ } elseif (count($table_info['primary_keys']) === 1) { | |
+ $primary_keys = array_shift($table_info['primary_keys']); | |
+ } else { | |
+ $primary_keys = ''; | |
+ } | |
+ | |
+ foreach ($table_info['columns'] as $column_name) { | |
+ if (!empty($primary_keys)) { | |
+ $format_select_query = 'SELECT %s, %s FROM %s WHERE '; | |
+ $select_query = sprintf( | |
+ $format_select_query, | |
+ $primary_keys, | |
+ $column_name, | |
+ $table_name | |
+ ); | |
+ } else { | |
+ $format_select_query = 'SELECT %s FROM %s WHERE '; | |
+ $select_query = sprintf( | |
+ $format_select_query, | |
+ $column_name, | |
+ $table_name | |
+ ); | |
+ } | |
+ | |
+ foreach ($russian_symbols as $symbol) { | |
+ if ($symbol === 'А') { | |
+ // first condition without 'OR' and with '(' | |
+ $select_query = $select_query . "({$column_name} LIKE '%{$symbol}%'"; | |
+ continue; | |
+ } | |
+ if ($symbol === 'я') { | |
+ // last condition with ')' | |
+ $select_query = $select_query . " OR {$column_name} LIKE '%{$symbol}%')"; | |
+ continue; | |
+ } | |
+ $select_query = $select_query . " OR {$column_name} LIKE '%{$symbol}%'"; | |
+ } | |
+ | |
+ if (!empty($table_info['additional_condition'])) { | |
+ $select_query = $select_query . $table_info['additional_condition']; | |
+ } | |
+ | |
+ // very long queries | |
+ $query_result = $db->query($select_query); | |
+ if ($query_result->num_rows === 0 || !$query_result) { | |
+ continue; | |
+ } | |
+ | |
+ $selected_values[$table_name]['is_lang_table'] = ($table_info['is_lang_table'] === 'Y') ? 'Y' : 'N'; | |
+ | |
+ while ($row = $query_result->fetch_assoc()) { | |
+ $info = [ | |
+ 'incorrect_value' => array_pop($row), | |
+ 'primary_values' => $row, | |
+ ]; | |
+ | |
+ if ($table_info['is_lang_table'] === 'Y') { | |
+ $info['is_en_value'] = ($row['lang_code'] === 'EN' || $row['lang_code'] === 'en') ? 'Y' : 'N'; | |
+ } | |
+ | |
+ $selected_values[$table_name]['columns'][$column_name][] = $info; | |
+ } | |
+ } | |
+ } | |
+ return $selected_values; | |
+} | |
+ | |
+/** | |
+ * Execute statistic from result | |
+ * | |
+ * @param array $selected_values Array of founded russian values | |
+ * | |
+ * @return array Statistic info | |
+ */ | |
+function fn_symbols_checker_get_statistic_info($selected_values) | |
+{ | |
+ $values_in_column_count = []; | |
+ $tables_count = 0; | |
+ $total_count = 0; | |
+ | |
+ foreach ($selected_values as $table_name => $table_info) { | |
+ $tables_count += 1; | |
+ foreach ($table_info['columns'] as $column_name => $values) { | |
+ $values_in_column_count[$table_name][$column_name] += count($values); | |
+ $total_count += count($values); | |
+ } | |
+ } | |
+ | |
+ return [ | |
+ 'tables_count' => $tables_count, | |
+ 'total_count' => $total_count, | |
+ 'values_in_column_count' => $values_in_column_count | |
+ ]; | |
+} | |
+ | |
+/** | |
+ * Put result in txt file ([db_name]_info.txt) | |
+ * | |
+ * @param array $selected_values Array of founded russian values | |
+ * @param array $options Options from console | |
+ * @param array $statistic Statistic info | |
+ * | |
+ * @return void | |
+ */ | |
+function fn_symbols_checker_put_result_in_file($selected_values, $options, $statistic) | |
+{ | |
+ $tables_count = $statistic['tables_count']; | |
+ $total_count = $statistic['total_count']; | |
+ $values_in_column_count = $statistic['values_in_column_count']; | |
+ | |
+ $handle = fopen($options['db_name'] . '_info.txt', 'w+'); | |
+ | |
+ $file_string = PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ | |
+ $file_string .= 'Statistic by russian symbols '. PHP_EOL; | |
+ $file_string .= 'DB name: ' . $options['db_name'] . PHP_EOL; | |
+ $file_string .= 'Tables count: ' . $tables_count . PHP_EOL; | |
+ $file_string .= 'Total count of russian values: ' . $total_count . PHP_EOL; | |
+ | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ | |
+ $file_string .= 'Detailed tables info: ' . PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ foreach ($values_in_column_count as $table_name => $columns) { | |
+ $file_string .= ' - ' . $table_name . ':' . PHP_EOL; | |
+ foreach ($columns as $column_name => $column_info) { | |
+ $file_string .= ' - ' . $column_name . ': ' . $column_info . PHP_EOL; | |
+ } | |
+ $file_string .= PHP_EOL; | |
+ } | |
+ | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ | |
+ $file_string .= 'Detailed values info: ' . PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ | |
+ foreach ($selected_values as $table_name => $table_info) { | |
+ $file_string .= ' - ' . $table_name . ': '. PHP_EOL; | |
+ foreach ($table_info['columns'] as $column_name => $columns_for_query) { | |
+ $file_string .= ' - ' . $column_name . ': ' . PHP_EOL; | |
+ foreach ($columns_for_query as $value) { | |
+ $file_string .= ' - ' . $value['incorrect_value'] . ';'. PHP_EOL; | |
+ } | |
+ $file_string .= PHP_EOL; | |
+ } | |
+ $file_string .= PHP_EOL; | |
+ } | |
+ | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= '-------------------------------------------------------------------------------------------------'; | |
+ $file_string .= PHP_EOL; | |
+ $file_string .= PHP_EOL; | |
+ | |
+ fwrite($handle, $file_string); | |
+ fclose($handle); | |
+} | |
+ | |
+/** | |
+ * Show result in console | |
+ * | |
+ * @param array $selected_values Array of founded russian values | |
+ * @param array $options Options from console | |
+ * @param array $statistic Statistic info | |
+ * | |
+ * @return void | |
+ */ | |
+function fn_symbols_checker_show_statistic_in_console($selected_values, $options, $statistic) | |
+{ | |
+ $tables_count = $statistic['tables_count']; | |
+ $total_count = $statistic['total_count']; | |
+ $values_in_column_count = $statistic['values_in_column_count']; | |
+ | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo('Statistic by russian symbols: ' . PHP_EOL); | |
+ echo('DB name: ' . $options['db_name'] . PHP_EOL); | |
+ echo('Tables count: ' . $tables_count . PHP_EOL); | |
+ echo('Total count of russian values: ' . $total_count . PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ | |
+ echo('Detailed tables info: ' . PHP_EOL); | |
+ echo(PHP_EOL); | |
+ foreach ($values_in_column_count as $table_name => $columns) { | |
+ echo(' - ' . $table_name . ':' . PHP_EOL); | |
+ foreach ($columns as $column_name => $column_info) { | |
+ echo(' - ' . $column_name . ': ' . $column_info . PHP_EOL); | |
+ } | |
+ echo(PHP_EOL); | |
+ } | |
+ | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ | |
+ echo('Detailed values info: ' . PHP_EOL); | |
+ echo(PHP_EOL); | |
+ | |
+ foreach ($selected_values as $table_name => $columns) { | |
+ echo(' - ' . $table_name . ': ' . PHP_EOL); | |
+ foreach ($columns['columns'] as $column_name => $columns_for_query) { | |
+ echo(' - ' . $column_name . ': '. PHP_EOL); | |
+ foreach ($columns_for_query as $value) { | |
+ echo(' - ' . $value['incorrect_value'] . ': '. PHP_EOL); | |
+ } | |
+ echo(PHP_EOL); | |
+ } | |
+ echo(PHP_EOL); | |
+ } | |
+ | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo('-------------------------------------------------------------------------------------------------'); | |
+ echo(PHP_EOL); | |
+ echo(PHP_EOL); | |
+} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment