Skip to content

Instantly share code, notes, and snippets.

@kelvan
Created September 28, 2018 13:49
Show Gist options
  • Save kelvan/9b7c550aaa725eefbb90026f86530da6 to your computer and use it in GitHub Desktop.
Save kelvan/9b7c550aaa725eefbb90026f86530da6 to your computer and use it in GitHub Desktop.
Check MySQL table for empty/null/"always the same" columns
import argparse
import MySQLdb
parser = argparse.ArgumentParser(description='Check table for empty/null/"always the same" columns')
parser.add_argument('--db', help='Databse name', required=True)
parser.add_argument('--table', help='Table name', required=True)
parser.add_argument('--user', help='DB user', required=True)
parser.add_argument('--password', help='DB password', required=True)
parser.add_argument('--host', help='DB host', default='localhost')
args = parser.parse_args()
db = MySQLdb.connect(host=args.host, user=args.user, passwd=args.password, db=args.db)
cur = db.cursor()
SQL_COLUMN_NAMES = "SELECT column_name FROM information_schema.columns WHERE table_name = '{args.table}' AND table_schema = '{args.db}';".format(args=args)
SQL_NON_NULL_COUNT = "SELECT COUNT(*) FROM `{args.table}` WHERE `{{0}}` is not null and CAST(`{{0}}` as CHAR) != '';".format(args=args)
SQL_DISTINCT_VALUE = "SELECT DISTINCT `{{0}}` FROM `{args.table}`;".format(args=args)
cur.execute(SQL_COLUMN_NAMES)
column_names = [row[0] for row in cur.fetchall()]
for column_name in column_names:
cur.execute(SQL_NON_NULL_COUNT.format(column_name))
if cur.fetchone()[0] == 0:
print('Column `{}` always empty'.format(column_name))
continue
cur.execute(SQL_DISTINCT_VALUE.format(column_name))
rows = cur.fetchall()
if len(rows) == 1:
print('Column `{}` always {}'.format(column_name, rows[0][0]))
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment