Created
August 24, 2015 19:13
-
-
Save willkg/5f7d17bc0cc3552205f4 to your computer and use it in GitHub Desktop.
given a mysql db dump, shows rough tabley bigness
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Takes a mysql db dump in the form of: | |
blah blah blah | |
INSERT INTO `table_name` VALUES (...) | |
INSERT INTO `table_name` VALUES (...) | |
It parses it and returns number of rows and total bytes of data per | |
table. | |
Usage:: | |
python dump_size.py somedump.sql | |
""" | |
import sys | |
def tableify(table): | |
num_cols = 0 | |
maxes = [] | |
for row in table: | |
num_cols = max(num_cols, len(row)) | |
if len(maxes) < len(row): | |
maxes.extend([0] * (len(row) - len(maxes))) | |
for i, cell in enumerate(row): | |
maxes[i] = max(maxes[i], len(str(cell))) | |
def fix_row(maxes, row): | |
return ' '.join([ | |
str(cell) + (' ' * (maxes[i] - len(str(cell)))) | |
for i, cell in enumerate(row) | |
]) | |
return '\n'.join( | |
[ | |
fix_row(maxes, row) | |
for row in table | |
] | |
) | |
def main(argv): | |
if not argv: | |
print 'dump_size.py [DBDUMP]' | |
return 1 | |
# tablename -> [list of row sizes] | |
row_data = {} | |
with open(argv[0], 'r') as fp: | |
for line in fp: | |
if not line.startswith('INSERT INTO'): | |
continue | |
table, data = line.split(' VALUES ', 1) | |
table_name = table[table.find('`')+1:table.rfind('`')] | |
data = data[1:-1] | |
row_data.setdefault(table_name, []).append(len(data)) | |
rows = [ | |
# name, # rows, size | |
(table_name, len(data), sum(data)) | |
for table_name, data in row_data.items() | |
] | |
rows.sort(key=lambda row: row[2], reverse=True) | |
rows.insert(0, ['name', 'rows', 'size of data']) | |
print tableify(rows) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment