Skip to content

Instantly share code, notes, and snippets.

@renegarcia
Created November 2, 2023 17:51
Show Gist options
  • Save renegarcia/9812aa7ddc6dbb9e0342a1aa2c92e106 to your computer and use it in GitHub Desktop.
Save renegarcia/9812aa7ddc6dbb9e0342a1aa2c92e106 to your computer and use it in GitHub Desktop.
Extract a zipped collection on csv files into a fresh sqlite3 database
"""
extract_zip_to_sqlite.py: Extract a zipped collection on csv files into a fresh sqlite3 database.
== Requirements ==
* A fairly recent version of Pandas.
* An schema of the tables definitions.
== Ussage ==
Update the value of `DATABASE`, `ZIPFILE` and `SCHEMA_LOCATION` as necessary.
"""
from pathlib import Path
from zipfile import ZipFile
from io import StringIO
import pandas as pd
import sqlite3
DATABASE = "database.sqlite3"
ZIPFILE = "zipfile_location.zip"
SCHEMA_LOCATION = "schema.sql"
if not Path(DATABASE).exists():
with open(SCHEMA_LOCATION) as f:
schema = f.read()
with sqlite3.connect(DATABASE) as con:
cur = con.cursor()
cur.executescript(schema)
con.close()
with ZipFile(ZIPFILE) as zipfile:
buffer = StringIO()
for csvfile in zipfile.namelist():
with zipfile.open(csvfile) as f:
tablename = Path(csvfile).stem
df = pd.read_csv(f)
with sqlite3.connect(DATABASE) as con:
cur = con.cursor()
cur.execute(f"DELETE FROM {tablename}")
con.commit()
df.to_sql(tablename, con, index=False, if_exists="append")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment