Skip to content

Instantly share code, notes, and snippets.

@Tinram
Last active September 6, 2022 12:50
Show Gist options
  • Save Tinram/18bcbbaeaee648ccde1b8ddfeb1d6f63 to your computer and use it in GitHub Desktop.
Save Tinram/18bcbbaeaee648ccde1b8ddfeb1d6f63 to your computer and use it in GitHub Desktop.
Seeking a faster MySQL LOAD DATA INFILE
# MySQL 5.7.33
# 1 million rows, 66MB CSV file
# 2022-09-06
## standard way into an InnoDB table; slow
# LOAD DATA LOCAL INFILE 'products.csv' INTO TABLE import FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY '\n' IGNORE 1 LINES (col1, col2, col3, col4)
-- Query OK, 1000000 rows affected (1 min 10.47 sec)
## mysqld parameters to adjust
# innodb_buffer_pool_size=2G
# innodb_log_file_size=512M
# innodb_flush_log_at_trx_commit=2
# innodb_flush_method=O_DIRECT
# innodb_change_buffer_max_size=50
## disable key checks
SET unique_checks=0;
SET foreign_key_checks=0;
## create temporary memory table
## Loading data into a memory table is orders of magnitude faster than a standard table.
## Using a temporary table rather than a pure memory table avoids the need to adjust the max_heap_table_size parameter for large datasets.
CREATE TEMPORARY TABLE t_import ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 SELECT * FROM t_product LIMIT 0;
-- Query OK, 0 rows affected (0.02 sec)
## establish primary key, which is not copied from base table
ALTER TABLE t_import MODIFY COLUMN id INT UNSIGNED PRIMARY KEY AUTO_INCREMENT;
-- Query OK, 0 rows affected (0.00 sec)
## load CSV data file, ignoring first header line
LOAD DATA LOCAL INFILE 'products.csv' INTO TABLE t_import FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES (col1, col2, col3, col4);
-- Query OK, 1000000 rows affected (3.32 sec)
## convert MyISAM table to InnoDB
ALTER TABLE t_import ENGINE=InnoDB;
-- Query OK, 1000000 rows affected (4.97 sec)
## add indexes to columns as required
# ALTER TABLE t_import ADD INDEX ...
## create holding table
CREATE table t_product_new LIKE t_import;
-- Query OK, 0 rows affected (0.56 sec)
## transfer memory table data to holding table
INSERT INTO t_product_new SELECT * FROM t_import;
-- Query OK, 1000000 rows affected (10.71 sec)
## rename holding table
RENAME TABLE t_product TO t_product_old;
# Query OK, 0 rows affected (0.06 sec)
RENAME TABLE t_product_new TO t_product;
-- Query OK, 0 rows affected (0.06 sec)
## verify table population
SELECT COUNT(*) FROM t_product;
SELECT * FROM t_product LIMIT 20;
## free
DROP TABLE t_import;
DROP TABLE t_product_old;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment