Skip to content

Instantly share code, notes, and snippets.

@Tinram
Created September 8, 2022 17:48
Show Gist options
  • Save Tinram/39eeec43e68f1d33c82ee4afe8e30267 to your computer and use it in GitHub Desktop.
Save Tinram/39eeec43e68f1d33c82ee4afe8e30267 to your computer and use it in GitHub Desktop.
faster MySQL LOAD DATA
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Bulk Loader
Bulk load into a new InnoDB table via a temporary MyISAM memory table.
~2x faster than LOAD DATA direct into InnoDB table.
From https://gist.github.com/Tinram/18bcbbaeaee648ccde1b8ddfeb1d6f63
Note: MEMORY table 2x faster, but requires max_heap_table_size pre-calculation; memory shunting/algos = poor performance.
Author: Martin Latter
Date: 07/09/2022
Version: 0.02
Py req: MySQLdb
pip3 install mysqlclient
"""
import contextlib
import sys
import time
import MySQLdb
import MySQLdb.cursors
#############################################################
# CONFIGURATION
#############################################################
DATAFILE = 'dump.csv'
TMP_TABLE_NAME = 'mem_import'
FINAL_TABLE_NAME = 'users'
CREDENTIALS_DB_LOCAL = dict(
user = 'root',
passwd = '',
host = 'localhost',
db = 'bulkload',
port = 3306,
cursorclass=MySQLdb.cursors.DictCursor
)
TEMP_TABLE = """
CREATE TEMPORARY TABLE IF NOT EXISTS `%s`(
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
`firstname` VARCHAR(20) NOT NULL,
`lastname` VARCHAR(20) NOT NULL,
`country` VARCHAR(20) NOT NULL DEFAULT '',
`country_code` CHAR(2) NOT NULL,
KEY `idx_fl` (`firstname`, `lastname`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
""" % (TMP_TABLE_NAME)
"""
server settings to consider using:
log_bin = OFF
innodb_doublewrite = OFF
innodb_flush_method = O_DIRECT
innodb_change_buffer_max_size = 50
ALTER INSTANCE disable innodb redo_log; # 8.0.21+
"""
#############################################################
UK = """ SET SESSION unique_checks = OFF """
FK = """ SET SESSION foreign_key_checks = OFF """
FL = """ SET GLOBAL innodb_flush_log_at_trx_commit = 2 """
IF = """ SET GLOBAL local_infile = ON """
IFE = """ SET GLOBAL local_infile = OFF """ # security clean-up
IMPORT = """
LOAD DATA LOCAL INFILE '%s' INTO TABLE `%s` FIELDS TERMINATED BY ',' IGNORE 1 LINES (firstname, lastname, country, country_code)
""" % (DATAFILE, TMP_TABLE_NAME)
CONV = """
ALTER TABLE `%s` ENGINE=InnoDB
""" % (TMP_TABLE_NAME)
CT = """
CREATE TABLE `%s` LIKE `%s`
""" % (FINAL_TABLE_NAME, TMP_TABLE_NAME)
INS = """
INSERT INTO `%s` SELECT * FROM `%s`;
""" % (FINAL_TABLE_NAME, TMP_TABLE_NAME)
DEL = """
DROP TABLE `%s`
""" % (TMP_TABLE_NAME)
#################################################################################
def main():
""" Connect and run queries. """
try:
conn = MySQLdb.connect(**CREDENTIALS_DB_LOCAL)
except MySQLdb.Error as err:
print('Cannot connect to database - %d: %s' % (err.args[0], err.args[1]))
sys.exit(1)
with contextlib.closing(conn):
with conn.cursor() as cursor:
try:
cursor.execute(IF)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(UK)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(FK)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(IF)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(TEMP_TABLE)
except MySQLdb.Error as err:
print(err)
try:
start = time.time()
imp = cursor.execute(IMPORT)
if imp > 0:
print(str(imp) + ' rows loaded')
finish = time.time() - start
print('LOAD DATA: ' + str(round(finish, 3)) + 's')
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(CONV)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(CT)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(INS)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(DEL)
except MySQLdb.Error as err:
print(err)
try:
cursor.execute(IFE)
except MySQLdb.Error as err:
print(err)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment