Skip to content

Instantly share code, notes, and snippets.

@yolabingo
Last active May 6, 2019 22:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yolabingo/ec91920f90f07ef0ed66193a6bf8807c to your computer and use it in GitHub Desktop.
Save yolabingo/ec91920f90f07ef0ed66193a6bf8807c to your computer and use it in GitHub Desktop.
create multiple MySQL dump files for a single (presumably large) table
#!/usr/bin/env python3
import sys
# create mysqldump commands to create multiple sql files for a single (large) table
# specify dump_file_count for desired number of files
db_host = 'HOST'
db_user = 'USER'
db_name = 'DBNAME'
db_pass = 'PASS'
db_table = 'TABLE'
table_id = 'id' # primary key column for this table
dump_file_count = 10 # desired number of dump files
# max_id = 0 means we query the DB and generate equal-size sql files
# specify max ID to avoid MySQL query, but sql files may vary in size
# it should be reasonably close to max PRIMARY KEY id but need not be exact
max_id = 0
try:
import MySQLdb
introspect_db = True
except ModuleNotFoundError:
introspect_db = False
mysqldump = f'mysqldump --opt --order-by-primary --compress -h {db_host} -u {db_user} -p{db_pass}'
if max_id:
# there may be gaps in IDs so sql files may not have equal numbers of INSERTs
chunk_size = int(max_id / dump_file_count)
chunks = list(range(0, max_id, chunk_size))
else:
if not introspect_db:
print("Unable to load python MySQLdb module")
print("Either specify max_id or install the module")
print("Possibly install with:")
print("pip install --user mysqlclient ")
sys.exit(1)
# else
db = MySQLdb.connect(host=db_host, db=db_name, user=db_user, passwd=db_pass).cursor()
db.execute(f'SELECT {table_id} FROM {db_table} ORDER BY {table_id}')
row_ids = [x[0] for x in db.fetchall()]
row_count = len(row_ids)
chunk_size = int(row_count / dump_file_count)
chunks = [0]
try:
# put equal numbers of INSERTs in each file
i = 1
while True:
chunks.append(row_ids[i * chunk_size])
i += 1
except IndexError:
chunks.append(row_ids[-1])
print('# %s rows inserted per file' % (chunk_size,))
for i in range(len(chunks)):
no_create = ''
if i:
no_create = '--skip-add-drop-table --no-create-info'
file_count = str(i).zfill(4)
start = chunks[i]
try:
end = chunks[i+1]
id_range = f'-w"{db_table}.{table_id}>={start} AND {db_table}.{table_id}<{end}"'
except IndexError:
# include new rows inserted since our db query
id_range = f'-w"{db_table}.{table_id}>={end}"'
print(f'{mysqldump} {no_create} {id_range} -r {db_table}.{file_count}.sql {db_name} {db_table}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment