Skip to content

Instantly share code, notes, and snippets.

@habedi
Created February 18, 2024 05:02
Show Gist options
  • Save habedi/07cc107fab3618bb6a9d667dd209c171 to your computer and use it in GitHub Desktop.
Save habedi/07cc107fab3618bb6a9d667dd209c171 to your computer and use it in GitHub Desktop.
A Python script to calatogue the tables in the eICU Collaborative Research Database (https://eicu-crd.mit.edu/) in DuckDB
import argparse
from pathlib import Path
import duckdb
# Create the parser
parser = argparse.ArgumentParser('`init_database.py` creates a DuckDB database '
'from the eICU dataset.')
# Add the arguments
parser.add_argument('eicu_dataset_path', type=str,
help='The path to the eICU dataset')
parser.add_argument('database_dir', type=str,
help='The directory where the database will be stored')
parser.add_argument('database_name', type=str,
help='The name of the database')
parser.add_argument('--recreate', action='store_true',
help='Recreate the database from scratch')
# Parse the arguments
args = parser.parse_args()
# Parameterize the database name and path
eicu_dataset_path = Path(args.eicu_dataset_path)
database_dir = Path(args.database_dir)
database_dir.mkdir(exist_ok=True, parents=True)
database_name = args.database_name
# Set to True to recreate the database from scratch
recreate_db = args.recreate
if recreate_db:
try:
(database_dir / database_name).unlink()
except FileNotFoundError:
pass
else:
print(f'Database {database_name} removed!')
eicu_tables = [str(t) for t in eicu_dataset_path.glob('*.csv.gz')]
# Generate the code to create the views
view_ddls = []
for table in eicu_tables:
table_name = table.split('/')[-1].split('.')[0].lower()
view_ddls.append(
f'CREATE VIEW IF NOT EXISTS {table_name}_view AS SELECT *'
f' FROM read_csv_auto(\'{table}\')')
for i, view in enumerate(view_ddls, start=1):
print(f'Creating view #{i}: {view}')
try:
with duckdb.connect(str(database_dir / database_name)) as conn:
conn.execute(view)
except Exception as e:
print(f'Error creating view: {e}')
else:
print(f'View created: {view}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment