Skip to content

Instantly share code, notes, and snippets.

@Sonophoto
Last active December 20, 2017 06:25
Show Gist options
  • Save Sonophoto/8de17356a6e0f8ddfe365c5ab2d4bacf to your computer and use it in GitHub Desktop.
Save Sonophoto/8de17356a6e0f8ddfe365c5ab2d4bacf to your computer and use it in GitHub Desktop.
Example of using python to perform simple ETL on JSON data to a flat Sqlite3 table
[
{
"name": "Chicago, IL",
"id": 1,
"scores": {
"walkability": 1.7,
"job_growth": 2.32,
"green_space": 0.9,
"taxes": 0.6
}
},
{
"name": "Seattle, WA",
"id": 2,
"scores": {
"walkability": 1.3,
"job_growth": 3.1,
"green_space": 1.2,
"taxes": 0.8
}
},
{
"name": "San Fransisco, CA",
"id": 3,
"scores": {
"walkability": 1.4,
"job_growth": 1.1,
"green_space": 1.5,
"taxes": 0.3
}
},
{
"name": "Kansas City, KS",
"id": 4,
"scores": {
"walkability": 0.3,
"job_growth": 1.1,
"green_space": 1.0,
"taxes": 1.3
}
},
{
"name": "Miami, FL",
"id": 5,
"scores": {
"walkability": 0.3,
"job_growth": 1.43,
"green_space": 0.9,
"taxes": 1.2
}
},
{
"name": "New York, NY",
"id": 6,
"scores": {
"walkability": 1.5,
"job_growth": 1.8,
"green_space": 1.4,
"taxes": 0.7
}
},
{
"name": "Detroit, MI",
"id": 7,
"scores": {
"walkability": 0.3,
"job_growth": 0.5,
"green_space": 0.3,
"taxes": 2.0
}
},
{
"name": "Los Angeles, CA",
"id": 8,
"scores": {
"walkability": 0.2,
"job_growth": 1.0,
"green_space": 0.7,
"taxes": 0.8
}
}
]
#!/usr/bin/env python3
""" Extracts json formatted city data from the file 'file_name'
Transforms json data into a single table schema with sqlite3 type names
Loads the flattened data records into the sqlite3 table 'cities'
AUTHOR: Brig Young <brig@sonophotostudios.com>
COPYRIGHT: Copyright 2017 Brig Young, Sonophotostudios.com
LICENSE: BSD2c (Citation Required)
"""
import sqlite3 as sqlite
import json as json
file_name = "city.json"
db_name = file_name.replace("json", "sqlite")
def load_json_data (file_name):
""" Load json data from file_name into data_json and return it
"""
try:
data_json = json.load(open(file_name))
except Exception as err:
print(err)
exit()
return data_json
def ETL2_sqlite (data_json):
""" Extracts, Transforms and Loads city data into sqlite db
To modify this function:
1. Add or remove data member from flat_schema
2. Add or remove its type from flat_types
3. Add or remove its entry in the loader loop below
4. Modify the test in test_etl.py (NOT INCLUDED IN THIS GIST)
"""
# Connect to our db
conn = sqlite.connect(db_name)
cursor = conn.cursor()
# Create our table - if it exists, overwrite it.
flat_schema = ['id', 'name', 'taxes', 'green_space', 'job_growth', 'walkability']
flat_types = ['text', 'text', 'real', 'real', 'real', 'real']
create_query = "create table cities ("
for x in range(len(flat_schema)):
create_query = (create_query + str(flat_schema[x]) + " " + str(flat_types[x]))
if x < (len(flat_schema)-1):
create_query += ", "
else:
create_query += ")"
try:
cursor.execute(create_query)
except sqlite.OperationalError: #ASSUMING exception is the table exists...
cursor.execute("""drop table cities""")
cursor.execute(create_query)
# Load our data into the db
for x in range(len(data_json)):
cursor.execute('insert into cities values (?,?,?,?,?,?)',\
(data_json[x]['id'],\
data_json[x]['name'],\
data_json[x]['scores']['taxes'],\
data_json[x]['scores']['green_space'],\
data_json[x]['scores']['job_growth'],\
data_json[x]['scores']['walkability']))
# Commit our changes and shutdown sqlite
conn.commit()
conn.close()
if __name__ == "__main__":
data_json = load_json_data(file_name)
ETL2_sqlite(data_json)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment