Skip to content

Instantly share code, notes, and snippets.

@hivefans
Forked from bembengarifin/upsert_table.sql
Last active March 17, 2020 02:03
Show Gist options
  • Save hivefans/eaba391eeb7d8b148d1785d5d62a0d83 to your computer and use it in GitHub Desktop.
Save hivefans/eaba391eeb7d8b148d1785d5d62a0d83 to your computer and use it in GitHub Desktop.
mysql bulk insert, with duplicate key update (upsert), and with conditional data update|-|{"files":{"upsert_table.sql":{"env":"plain"}},"tag":"bigdata"}
/*
references:
- https://dev.mysql.com/doc/refman/5.7/en/insert-on-duplicate.html
- https://stackoverflow.com/questions/32777081/bulk-insert-and-update-in-mysql
- https://thewebfellas.com/blog/conditional-duplicate-key-updates-with-mysql
*/
/* create a new database and use it */
drop database if exists test_upsert;
create database test_upsert;
use test_upsert;
/* create the upsert table for testing */
drop table if exists upsert_table;
create table upsert_table
(
id int(11) unsigned not null primary key auto_increment,
unkey1 varchar(10) not null,
unkey2 varchar(10) not null,
val1 varchar(100) not null,
val2 varchar(100) not null,
lastmodified int(11) unsigned,
unique(unkey1, unkey2)
) engine=innodb default charset=utf8mb4;
/* perform the initial vanilla bulk data load */
insert into upsert_table
(unkey1, unkey2, val1, val2, lastmodified)
values
('A', '11', 'Meeting A', 'room 1-1', UNIX_TIMESTAMP('2017-08-25 20:00:01')),
('B', '22', 'Meeting B', 'room 2-2', UNIX_TIMESTAMP('2017-08-25 20:00:02')),
('C', '33', 'Meeting C', 'room 3-3', UNIX_TIMESTAMP('2017-08-25 20:00:03')),
('D', '44', 'Meeting D', 'room 4-4', UNIX_TIMESTAMP('2017-08-25 20:00:04')),
('E', '55', 'Meeting E', 'room 5-5', UNIX_TIMESTAMP('2017-08-25 20:00:05'));
/* view the inserted data */
select * from upsert_table;
/*
+----+--------+--------+-----------+----------+--------------+
| id | unkey1 | unkey2 | val1 | val2 | lastmodified |
+----+--------+--------+-----------+----------+--------------+
| 1 | A | 11 | Meeting A | room 1-1 | 1503662401 |
| 2 | B | 22 | Meeting B | room 2-2 | 1503662402 |
| 3 | C | 33 | Meeting C | room 3-3 | 1503662403 |
| 4 | D | 44 | Meeting D | room 4-4 | 1503662404 |
| 5 | E | 55 | Meeting E | room 5-5 | 1503662405 |
+----+--------+--------+-----------+----------+--------------+
*/
/* the actual upsert + "conditional" data update based on timestamp */
insert into upsert_table
(unkey1, unkey2, val1, val2, lastmodified)
values
('A', '11', 'Updated Meeting A', 'room 1-10', UNIX_TIMESTAMP('2017-08-25 20:00:11')),
('B', '22', 'Should not update Meeting B', 'room 2-20', UNIX_TIMESTAMP('2017-08-25 19:00:02')),
('F', '66', 'New Meeting F', 'room 6-6', UNIX_TIMESTAMP('2017-08-25 20:00:06'))
on duplicate key update
val1 = if (lastmodified < values(lastmodified), values(val1), val1),
val2 = if (lastmodified < values(lastmodified), values(val2), val2),
lastmodified = if (lastmodified < values(lastmodified), values(lastmodified), lastmodified);
/*
verify that
row with key "A"-"11" should be updated because data with same unique key exists and timestamp is greater than existing timestamp
row with key "B"-"22" should not be updated although data with same unique key exists because the timestamp is lesser than existing timestamp (actually updated with the existing value)
row with key "F"-"66" should be inserted because data with same unique key does not exist
*/
select * from upsert_table;
/*
+----+--------+--------+-------------------+-----------+--------------+
| id | unkey1 | unkey2 | val1 | val2 | lastmodified |
+----+--------+--------+-------------------+-----------+--------------+
| 1 | A | 11 | Updated Meeting A | room 1-10 | 1503662411 |
| 2 | B | 22 | Meeting B | room 2-2 | 1503662402 |
| 3 | C | 33 | Meeting C | room 3-3 | 1503662403 |
| 4 | D | 44 | Meeting D | room 4-4 | 1503662404 |
| 5 | E | 55 | Meeting E | room 5-5 | 1503662405 |
| 6 | F | 66 | New Meeting F | room 6-6 | 1503662406 |
+----+--------+--------+-------------------+-----------+--------------+
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment