Skip to content

Instantly share code, notes, and snippets.

@acheong08
Last active September 21, 2022 20:58
Show Gist options
  • Save acheong08/c8373dbe816f1fc09de0c1e0e89d23b4 to your computer and use it in GitHub Desktop.
Save acheong08/c8373dbe816f1fc09de0c1e0e89d23b4 to your computer and use it in GitHub Desktop.
Convert excel to SQLite database
# This program takes an excel documents and maps it to a sqlite database.
# It is designed to be used with the excel-to-sqlite.xlsx template.
#
import sqlite3
import xlrd
import sys
import logging
import openpyxl
def useOPENPYXL(excelFile, sqliteFile):
# Read excel data using openpyxl
workbook = openpyxl.load_workbook(excelFile)
worksheet = workbook.active
# Get a list of column headings
column_headings = list(worksheet.rows)[0]
# Create a list of column names
column_names = []
for column_heading in column_headings:
column_names.append(column_heading.value.lower().replace(' ', '_'))
# Create sqlite table with fields as column names
sqlite_table_create_statement = 'CREATE TABLE IF NOT EXISTS ' + worksheet.title + ' ('
# Add id field as primary key and autoincrement
sqlite_table_create_statement += 'id INTEGER PRIMARY KEY AUTOINCREMENT, '
# Insert column names into sqlite table create statement
for column_name in column_names:
sqlite_table_create_statement += column_name + ' TEXT,'
sqlite_table_create_statement = sqlite_table_create_statement[:-1] + ')'
# Execute sqlite table create statement
logging.info('Creating table ' + worksheet.title + ' in ' + output_filename)
conn = sqlite3.connect(sqliteFile)
c = conn.cursor()
c.execute(sqlite_table_create_statement)
conn.commit()
# Insert data into sqlite table
logging.info('Inserting data into table ' + worksheet.title + ' in ' + output_filename)
for row in worksheet.rows:
sqlite_insert_statement = 'INSERT INTO ' + worksheet.title + ' ('
for column_name in column_names:
sqlite_insert_statement += column_name + ','
sqlite_insert_statement = sqlite_insert_statement[:-1] + ') VALUES ('
for cell in row:
sqlite_insert_statement += '"' + str(cell.value) + '",'
sqlite_insert_statement = sqlite_insert_statement[:-1] + ')'
c.execute(sqlite_insert_statement)
conn.commit()
# Close sqlite connection
conn.close()
logging.info('Done')
def useXLRD(excelFile, sqliteFile):
# Read excel data
workbook = xlrd.open_workbook(excelFile)
worksheet = workbook.sheet_by_index(0)
# Get a list of column headings
column_headings = worksheet.row_values(0)
# Create a list of column names
column_names = []
for column_heading in column_headings:
column_names.append(column_heading.lower().replace(' ', '_'))
# Create sqlite table with fields as column names
sqlite_table_create_statement = 'CREATE TABLE IF NOT EXISTS ' + worksheet.name + ' ('
# Add id field as primary key and autoincrement
sqlite_table_create_statement += 'id INTEGER PRIMARY KEY AUTOINCREMENT, '
# Insert column names into sqlite table create statement
for column_name in column_names:
sqlite_table_create_statement += column_name + ' TEXT,'
sqlite_table_create_statement = sqlite_table_create_statement[:-1] + ')'
# Execute sqlite table create statement
logging.info('Creating table ' + worksheet.name + ' in ' + output_filename)
conn = sqlite3.connect(sqliteFile)
c = conn.cursor()
c.execute(sqlite_table_create_statement)
conn.commit()
# Insert data into sqlite table
logging.info('Inserting data into table ' + worksheet.name + ' in ' + output_filename)
for row_number in range(1, worksheet.nrows):
row_data = worksheet.row_values(row_number)
sqlite_insert_statement = 'INSERT INTO ' + worksheet.name + ' ('
for column_name in column_names:
sqlite_insert_statement += column_name + ','
sqlite_insert_statement = sqlite_insert_statement[:-1] + ') VALUES ('
for row_value in row_data:
sqlite_insert_statement += '"' + str(row_value) + '",'
sqlite_insert_statement = sqlite_insert_statement[:-1] + ')'
c.execute(sqlite_insert_statement)
conn.commit()
# Close sqlite connection
conn.close()
logging.info('Done')
# Take two arguments, excel and output
if len(sys.argv) != 4:
print('''
Usage: python3 excel-to-sqlite.py <excel_file> <output_file> <module>
Modules: xlrd, openpyxl
''')
sys.exit(1)
excel_filename = sys.argv[1]
output_filename = sys.argv[2]
module = sys.argv[3]
# Set logging level
logging.basicConfig(level=logging.INFO)
if module == 'xlrd':
useXLRD(excel_filename, output_filename)
elif module == 'openpyxl':
useOPENPYXL(excel_filename, output_filename)
else:
print('Invalid module')
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment