Skip to content

Instantly share code, notes, and snippets.

@MaximusMcCann
Created July 24, 2019 22:22
Show Gist options
  • Save MaximusMcCann/6c961678c133bc19f731e662f86781ce to your computer and use it in GitHub Desktop.
Save MaximusMcCann/6c961678c133bc19f731e662f86781ce to your computer and use it in GitHub Desktop.
Bulk upsert into Postgres query generator.
#!/usr/bin/env python
import os
print("")
print("------------------------")
print(" Note, this query depends on there being a `thumbprint` column and it having a unique index on it.")
print(" The query assumes you'll have a {stage_table} (or tmp table) to initially load data and then the {target_table} to upsert into.")
print(" See below for general flow for bulk upserting to a table")
print("")
print(" DROP TABLE IF EXISTS {stage_table};")
print(" CREATE TABLE {stage_table} (LIKE {target_table} INCLUDING ALL); # Note can be TEMP TABLE, but realize TEMP TABLEs are auto-dropped at the end of the connection. ")
print(" ALTER TABLE {stage_table} DROP COLUMN id; # Avoiod inserting NULL on NON NULL field")
print(" CREATE UNIQUE INDEX IF NOT EXISTS thumbprint_unique_idx ON {stage_table} (thumbprint);")
print("")
print(" <Bulk insert all data step>")
print("")
print(" BEGIN;")
print(" INSERT INTO... Query that's generated by this script!!!")
print(" DROP TABLE IF EXISTS {stage_table};")
print(" END;")
print("------------------------")
print("")
table_name = raw_input("What is your Postgres table's name?")
print("Run this command in Postico/Postgres: SELECT column_name FROM information_schema.columns WHERE table_name = '" + table_name + "';")
print("Copy the whole table from postico. Should just be one column. Paste contents here. Hit return twice after pasting.")
columns = []
line = " "
while len(line) > 0:
line = raw_input("")
print(line)
if line.strip() != '' and line != 'id' and line != 'updated_at' and line != 'created_at':
columns.append(line)
comma_columns = ", ".join(columns)
update_set_parts = []
for col in columns:
update_set_parts.append(col + " = EXCLUDED." + col)
set_part = ", ".join(update_set_parts)
update_where_parts = []
for col in columns:
update_where_parts.append("_target_table." + col + " != EXCLUDED." + col)
where_part = " OR ".join(update_where_parts)
upsert = """
INSERT INTO {{target_table}} AS _target_table ({comma_columns}) (SELECT {comma_columns} FROM {{stage_table}}) ON CONFLICT (thumbprint) DO UPDATE SET {set_part} WHERE {where_part};
""".format(comma_columns = comma_columns, set_part = set_part, where_part = where_part)
os.system("echo '%s' | pbcopy" % upsert)
print("Use the below command in order to UPSERT. Was also just copied to your clipboard!")
print("-------")
print(upsert)
print("-------")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment