Skip to content

Instantly share code, notes, and snippets.

@senthilsweb
Last active December 15, 2019 01:19
Show Gist options
  • Save senthilsweb/502b17095e907f4f17fa9c6d198e45b1 to your computer and use it in GitHub Desktop.
Save senthilsweb/502b17095e907f4f17fa9c6d198e45b1 to your computer and use it in GitHub Desktop.
Python utility script to load large CSV data into sqlite in chunks
# -----------------------------------------------------
# python utility script to load large CSV data into sqlite in chunks
# dependencies: Pandas & sqlalchemy
# csv file used for testing : https://www.kaggle.com/sirpunch/meetups-data-from-meetupcom#members.csv
# -----------------------------------------------------
# {Modification Log}
# -----------------------------------------------------
# Author: Senthilnathan
# Maintainer: {maintainer}
# Created At: 12/14/2019
# Last Modified: 12/14/2019
# Status: production
# -----------------------------------------------------
import pandas as pd
from sqlalchemy import create_engine
import datetime as dt
# variables (Start)
dbname='meetup'
tname='members'
encoding='ISO-8859-1'
csv_database = create_engine('sqlite:///{}.db'.format(dbname))
start = dt.datetime.now()
chunksize = 100000
i = 0
j = 1
# variables (End)
try:
file = 'members.csv' # Change the path
for df in pd.read_csv(file, encoding=encoding, chunksize=chunksize, iterator=True):
df = df.rename(columns={c: c.replace(' ', '') for c in df.columns})
df.index += j
i += 1 # chunk count
print('{} seconds: completed {} rows'.format((dt.datetime.now() - start).seconds, i * chunksize))
df.to_sql(tname, csv_database, if_exists='append')
j = df.index[-1] + 1
end = dt.datetime.now()
print('Finished importing ~{} rows into sqlite in {} seconds'.format(i * chunksize, (end - start).seconds))
except Exception as e:
print('Exception during import - [{}]'.format(str(e)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment