Last active
May 6, 2019 22:30
-
-
Save yolabingo/ec91920f90f07ef0ed66193a6bf8807c to your computer and use it in GitHub Desktop.
create multiple MySQL dump files for a single (presumably large) table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
# create mysqldump commands to create multiple sql files for a single (large) table | |
# specify dump_file_count for desired number of files | |
db_host = 'HOST' | |
db_user = 'USER' | |
db_name = 'DBNAME' | |
db_pass = 'PASS' | |
db_table = 'TABLE' | |
table_id = 'id' # primary key column for this table | |
dump_file_count = 10 # desired number of dump files | |
# max_id = 0 means we query the DB and generate equal-size sql files | |
# specify max ID to avoid MySQL query, but sql files may vary in size | |
# it should be reasonably close to max PRIMARY KEY id but need not be exact | |
max_id = 0 | |
try: | |
import MySQLdb | |
introspect_db = True | |
except ModuleNotFoundError: | |
introspect_db = False | |
mysqldump = f'mysqldump --opt --order-by-primary --compress -h {db_host} -u {db_user} -p{db_pass}' | |
if max_id: | |
# there may be gaps in IDs so sql files may not have equal numbers of INSERTs | |
chunk_size = int(max_id / dump_file_count) | |
chunks = list(range(0, max_id, chunk_size)) | |
else: | |
if not introspect_db: | |
print("Unable to load python MySQLdb module") | |
print("Either specify max_id or install the module") | |
print("Possibly install with:") | |
print("pip install --user mysqlclient ") | |
sys.exit(1) | |
# else | |
db = MySQLdb.connect(host=db_host, db=db_name, user=db_user, passwd=db_pass).cursor() | |
db.execute(f'SELECT {table_id} FROM {db_table} ORDER BY {table_id}') | |
row_ids = [x[0] for x in db.fetchall()] | |
row_count = len(row_ids) | |
chunk_size = int(row_count / dump_file_count) | |
chunks = [0] | |
try: | |
# put equal numbers of INSERTs in each file | |
i = 1 | |
while True: | |
chunks.append(row_ids[i * chunk_size]) | |
i += 1 | |
except IndexError: | |
chunks.append(row_ids[-1]) | |
print('# %s rows inserted per file' % (chunk_size,)) | |
for i in range(len(chunks)): | |
no_create = '' | |
if i: | |
no_create = '--skip-add-drop-table --no-create-info' | |
file_count = str(i).zfill(4) | |
start = chunks[i] | |
try: | |
end = chunks[i+1] | |
id_range = f'-w"{db_table}.{table_id}>={start} AND {db_table}.{table_id}<{end}"' | |
except IndexError: | |
# include new rows inserted since our db query | |
id_range = f'-w"{db_table}.{table_id}>={end}"' | |
print(f'{mysqldump} {no_create} {id_range} -r {db_table}.{file_count}.sql {db_name} {db_table}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment