Last active
July 6, 2022 06:39
-
-
Save ValentinMouret/dab8d854c93dd180042346b115479a14 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- By beginning a transaction, you ensure | |
-- that you won't break anything. | |
-- You can always rollback. | |
begin; | |
create temp table batch_user ( | |
-- Enforcing a primary key slows down the processing | |
-- but it helps you making sure you don't have duplicates | |
-- in your batch. | |
id text primary key, | |
-- By storing the full JSON, we can easily process | |
-- the Firestore document. | |
document jsonb not null | |
); | |
-- This will read a CSV on your local filesystem | |
-- and insert its records in the table you just created. | |
copy batch_user (id, document) | |
from '/Users/valentinmouret/Desktop/users.csv' | |
with (format 'csv', | |
header, | |
delimiter ','); | |
-- Optional | |
-- This creates a savepoint. | |
-- Whatever happens after (unless you commit) | |
-- you will be able to rollback to this savepoint using | |
-- `rollback to temporary_table_created`. | |
-- This can be very useful when experimenting, | |
-- looking around to make sure things are the way they should. | |
savepoint batch_inserted; | |
-- E.g., get a glimpse of the emails. | |
select document->>'email' | |
from batch_user | |
limit 10; | |
-- Here is the actual batch insert. | |
-- Since its a SQL query, you can leverage the full | |
-- power of SQL to prepare your insert. `group by`, | |
-- `join`, function calls… | |
insert into user | |
(id, email) | |
select batch_user.id, | |
-- Access the `email` attribute of our JSON. | |
batch_user.document->>'email' as email | |
from batch_user | |
-- Since there might be data in the table already, | |
-- you might want to leave out items that were migrated. | |
-- This can be done with a left join. | |
left join user | |
using (id) | |
where user.id is null | |
-- We have a `not null` constraint in our table, | |
-- so we need to filter out null values. | |
and batch_user.document->>'email' is not null; | |
savepoint done; | |
-- Look around some more. | |
select count(*) | |
from user; | |
select id, | |
from user | |
limit 10; | |
-- Once everything looks good. | |
commit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment