Skip to content

Instantly share code, notes, and snippets.

@palfrey
Created June 13, 2023 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save palfrey/3576c3718d5f9e45bc1c7b70903a5ac7 to your computer and use it in GitHub Desktop.
Save palfrey/3576c3718d5f9e45bc1c7b70903a5ac7 to your computer and use it in GitHub Desktop.
import csv
import json
import sys
import threading
import urllib.parse
from http.server import HTTPServer, SimpleHTTPRequestHandler
from pathlib import Path
import psycopg2
url = sys.argv[1]
res = urllib.parse.urlparse(url)
conn = psycopg2.connect(
database=res.path[1:],
port=res.port,
user=res.username,
host=res.hostname,
password=res.password,
)
conn.autocommit = True
cur = conn.cursor()
fileserver = sys.argv[2]
def run():
server_address = ("", 9000)
httpd = HTTPServer(server_address, SimpleHTTPRequestHandler)
httpd.serve_forever()
server_thread = threading.Thread(target=run)
server_thread.daemon = True
server_thread.start()
dump_folder = Path("dump")
deps = json.load(dump_folder.joinpath("deps.json").open())
wiped = []
while len(wiped) < len(deps.keys()):
for table, requires in deps.items():
if table in wiped:
continue
for req in requires:
if req not in wiped:
break
else:
print(f"Wiping {table}")
cur.execute(f"TRUNCATE {table} CASCADE;")
wiped.append(table)
for dump_file in dump_folder.glob("*.csv"):
table_name = dump_file.with_suffix("").name
with dump_file.open() as f:
reader = csv.reader(f, delimiter=";")
headers = reader.__next__()
header = ",".join([f'"{h}"' for h in headers])
sql = f"IMPORT INTO {table_name} ({header}) \
CSV DATA ('http://{fileserver}:9000/dump/{dump_file.name}') \
WITH delimiter = ';', skip='1';"
print(sql)
cur.execute(sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment