Skip to content

Instantly share code, notes, and snippets.

@woodongwong
Last active April 19, 2023 04:55
Show Gist options
  • Save woodongwong/4bcf9a792c15e0ffbd5a4e6271b0a269 to your computer and use it in GitHub Desktop.
Save woodongwong/4bcf9a792c15e0ffbd5a4e6271b0a269 to your computer and use it in GitHub Desktop.
Check for data overflow in MySQL string type fields.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
import argparse
import getpass
def parse_command_line_args():
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--user', default='root', help='MySQL user (default: root)')
parser.add_argument('-P', '--port', type=int, default=3306, help='MySQL port (default: 3306)')
parser.add_argument('-H', '--host', default='localhost', help='MySQL host (default: localhost)')
parser.add_argument('-d', '--database', required=True, help='MySQL database (required)')
parser.add_argument('-t', '--table', help='MySQL table (optional)')
parser.add_argument('-p', '--password', action='store_true', help='Prompt for MySQL password (optional)')
args = parser.parse_args()
# prompt for password if -p is included
if args.password:
args.password = getpass.getpass()
return args
def main():
args = parse_command_line_args()
# Connect to database
connection = pymysql.connect(host=args.host,
user=args.user,
password=args.password or "",
database=args.database,
port=args.port,
cursorclass=pymysql.cursors.DictCursor)
with connection:
with connection.cursor() as cursor:
sql = """SELECT `COLUMN_NAME`, `CHARACTER_OCTET_LENGTH`, `TABLE_NAME`
FROM `INFORMATION_SCHEMA`.`COLUMNS`
WHERE `TABLE_SCHEMA` = '{}' {} AND `DATA_TYPE` IN
('varbinary', 'varchar', 'blob', 'text', 'mediumblob', 'mediumtext', 'longblob', 'longtext')""".format(
args.database, args.table is not None and " AND `TABLE_NAME` = '{}' ".format(args.table) or "")
cursor.execute(sql)
result = cursor.fetchall()
# Group by TABLE_NAME
table_dict = {}
for item in result:
if item['TABLE_NAME'] not in table_dict:
table_dict[item['TABLE_NAME']] = []
table_dict[item['TABLE_NAME']].append(item)
for table_name, table_info in table_dict.items():
# Concatenate SQL and check the maximum length of fields in the table.
# SELECT MAX(LENGTH(`field1`)) AS `field1`, MAX(LENGTH(`field2`)) AS `field2` FROM table;
sql = "SELECT "
for item in table_info:
sql += "MAX(LENGTH(`{}`)) AS `{}`, ".format(item['COLUMN_NAME'], item['COLUMN_NAME'])
sql = sql[:-2] + " FROM {}".format(table_name)
cursor.execute(sql)
result = cursor.fetchone()
for item in table_info:
if result[item['COLUMN_NAME']] is not None and result[
item['COLUMN_NAME']] == item['CHARACTER_OCTET_LENGTH']:
# Count the number of data overflowed.
sql = "SELECT COUNT(*) AS `total` FROM (SELECT LENGTH(`{}`) AS `length` FROM `{}` HAVING `length` = '{}') `tmp`".format(
item['COLUMN_NAME'], table_name, item['CHARACTER_OCTET_LENGTH'])
cursor.execute(sql)
total = cursor.fetchone()['total']
if total > 0:
print(
"The table \033[1;35;40m{}\033[0m field \033[1;36;40m{}\033[0m has overflowed its capacity, with a total of \033[1;31;40m{}\033[0m data entries."
.format(table_name, item['COLUMN_NAME'], total))
if __name__ == "__main__":
main()
@woodongwong
Copy link
Author

woodongwong commented Apr 19, 2023

To use this script, you need to install the required dependencies. You can do this by running the command pip install pymysql in your terminal.

Once you have installed the required dependencies, you can run the script by executing the command python3 check_mysql_data_overflow.py in your terminal. The script takes several command line arguments:

-u, --user: the MySQL user to connect as (default is "root").
-P, --port: the port number to use for the MySQL connection (default is 3306).
-H, --host: the hostname or IP address of the MySQL server (default is "localhost").
-d, --database: the name of the MySQL database to connect to (required).
-t, --table: the name of a specific table to check (optional).
-p, --password: if included, the script will prompt you to enter a password for the MySQL connection.

You can specify any combination of these arguments when running the script. For example, to check a specific table in a MySQL database on a remote server with a non-default port and username, you could run the command:

python3 check_mysql_data_overflow.py -u myuser -H myserver.example.com -P 3306 -d mydatabase -t mytable -p

Note that if you include the -p argument, the script will prompt you to enter a password for the MySQL connection, even if you have already specified one using the --password argument.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment