Skip to content

Instantly share code, notes, and snippets.

@willkg
Created August 24, 2015 19:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willkg/5f7d17bc0cc3552205f4 to your computer and use it in GitHub Desktop.
Save willkg/5f7d17bc0cc3552205f4 to your computer and use it in GitHub Desktop.
given a mysql db dump, shows rough tabley bigness
#!/usr/bin/env python
"""
Takes a mysql db dump in the form of:
blah blah blah
INSERT INTO `table_name` VALUES (...)
INSERT INTO `table_name` VALUES (...)
It parses it and returns number of rows and total bytes of data per
table.
Usage::
python dump_size.py somedump.sql
"""
import sys
def tableify(table):
num_cols = 0
maxes = []
for row in table:
num_cols = max(num_cols, len(row))
if len(maxes) < len(row):
maxes.extend([0] * (len(row) - len(maxes)))
for i, cell in enumerate(row):
maxes[i] = max(maxes[i], len(str(cell)))
def fix_row(maxes, row):
return ' '.join([
str(cell) + (' ' * (maxes[i] - len(str(cell))))
for i, cell in enumerate(row)
])
return '\n'.join(
[
fix_row(maxes, row)
for row in table
]
)
def main(argv):
if not argv:
print 'dump_size.py [DBDUMP]'
return 1
# tablename -> [list of row sizes]
row_data = {}
with open(argv[0], 'r') as fp:
for line in fp:
if not line.startswith('INSERT INTO'):
continue
table, data = line.split(' VALUES ', 1)
table_name = table[table.find('`')+1:table.rfind('`')]
data = data[1:-1]
row_data.setdefault(table_name, []).append(len(data))
rows = [
# name, # rows, size
(table_name, len(data), sum(data))
for table_name, data in row_data.items()
]
rows.sort(key=lambda row: row[2], reverse=True)
rows.insert(0, ['name', 'rows', 'size of data'])
print tableify(rows)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment