yolabingo/mysql-dump-chunks.py

## mysql-dump-chunks.py
#!/usr/bin/env python3
import sys

# create mysqldump commands to create multiple sql files for a single (large) table
# specify dump_file_count for desired number of files

db_host = 'HOST'
db_user = 'USER'
db_name = 'DBNAME'
db_pass = 'PASS'
db_table = 'TABLE'
table_id = 'id' # primary key column for this table
dump_file_count = 10 # desired number of dump files
# max_id = 0 means we query the DB and generate equal-size sql files
# specify max ID to avoid MySQL query, but sql files may vary in size
# it should be reasonably close to max PRIMARY KEY id but need not be exact
max_id = 0

try:
    import MySQLdb
    introspect_db = True
except ModuleNotFoundError:
    introspect_db = False

mysqldump = f'mysqldump --opt --order-by-primary --compress -h {db_host} -u {db_user} -p{db_pass}'

if max_id:
    # there may be gaps in IDs so sql files may not have equal numbers of INSERTs
    chunk_size = int(max_id / dump_file_count)
    chunks = list(range(0, max_id, chunk_size))
else:
    if not introspect_db:
        print("Unable to load python MySQLdb module")
        print("Either specify max_id or install the module")
        print("Possibly install with:")
        print("pip install --user mysqlclient ")
        sys.exit(1)
    # else
    db = MySQLdb.connect(host=db_host, db=db_name, user=db_user, passwd=db_pass).cursor()
    db.execute(f'SELECT {table_id} FROM {db_table} ORDER BY {table_id}')
    row_ids = [x[0] for x in db.fetchall()]
    row_count = len(row_ids)
    chunk_size = int(row_count / dump_file_count)
    chunks = [0]
    try:
        # put equal numbers of INSERTs in each file
        i = 1
        while True:
            chunks.append(row_ids[i * chunk_size])
            i += 1
    except IndexError:
        chunks.append(row_ids[-1])
    print('# %s rows inserted per file' % (chunk_size,))

for i in range(len(chunks)):
    no_create = ''
    if i:
        no_create = '--skip-add-drop-table --no-create-info'
    file_count = str(i).zfill(4)
    start = chunks[i]
    try:
        end = chunks[i+1]
        id_range = f'-w"{db_table}.{table_id}>={start} AND {db_table}.{table_id}<{end}"'
    except IndexError:
        # include new rows inserted since our db query
        id_range = f'-w"{db_table}.{table_id}>={end}"'
    print(f'{mysqldump} {no_create} {id_range} -r {db_table}.{file_count}.sql {db_name} {db_table}')
	#!/usr/bin/env python3
	import sys

	# create mysqldump commands to create multiple sql files for a single (large) table
	# specify dump_file_count for desired number of files

	db_host = 'HOST'
	db_user = 'USER'
	db_name = 'DBNAME'
	db_pass = 'PASS'
	db_table = 'TABLE'
	table_id = 'id' # primary key column for this table
	dump_file_count = 10 # desired number of dump files
	# max_id = 0 means we query the DB and generate equal-size sql files
	# specify max ID to avoid MySQL query, but sql files may vary in size
	# it should be reasonably close to max PRIMARY KEY id but need not be exact
	max_id = 0

	try:
	import MySQLdb
	introspect_db = True
	except ModuleNotFoundError:
	introspect_db = False

	mysqldump = f'mysqldump --opt --order-by-primary --compress -h {db_host} -u {db_user} -p{db_pass}'

	if max_id:
	# there may be gaps in IDs so sql files may not have equal numbers of INSERTs
	chunk_size = int(max_id / dump_file_count)
	chunks = list(range(0, max_id, chunk_size))
	else:
	if not introspect_db:
	print("Unable to load python MySQLdb module")
	print("Either specify max_id or install the module")
	print("Possibly install with:")
	print("pip install --user mysqlclient ")
	sys.exit(1)
	# else
	db = MySQLdb.connect(host=db_host, db=db_name, user=db_user, passwd=db_pass).cursor()
	db.execute(f'SELECT {table_id} FROM {db_table} ORDER BY {table_id}')
	row_ids = [x[0] for x in db.fetchall()]
	row_count = len(row_ids)
	chunk_size = int(row_count / dump_file_count)
	chunks = [0]
	try:
	# put equal numbers of INSERTs in each file
	i = 1
	while True:
	chunks.append(row_ids[i * chunk_size])
	i += 1
	except IndexError:
	chunks.append(row_ids[-1])
	print('# %s rows inserted per file' % (chunk_size,))

	for i in range(len(chunks)):
	no_create = ''
	if i:
	no_create = '--skip-add-drop-table --no-create-info'
	file_count = str(i).zfill(4)
	start = chunks[i]
	try:
	end = chunks[i+1]
	id_range = f'-w"{db_table}.{table_id}>={start} AND {db_table}.{table_id}<{end}"'
	except IndexError:
	# include new rows inserted since our db query
	id_range = f'-w"{db_table}.{table_id}>={end}"'
	print(f'{mysqldump} {no_create} {id_range} -r {db_table}.{file_count}.sql {db_name} {db_table}')