Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import json
df = pd.read_csv(
's3://airetail/bronze/sfleads/addresses/Addresses.csv'
)
df.address = df['address'].apply(json.loads)
df.phone_numbers = df['phone_numbers']. \
apply(lambda pn: json.loads(pn) if not pd.isnull(pn) else None)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Instructions

Here are the instructions to execute the series of code snippets to solve the problem.

  • Initialization Process
  • Run SQL Query
  • Execute Python Script
id first_name last_name email gender ip_address
1 Roderic Cristofolo rcristofolo0@opera.com Male 216.22.180.231
2 Dunstan Burchard dburchard1@purevolume.com Male 18.135.76.159
3 Jeane Gronaver jgronaver2@mit.edu Female 40.123.108.201
4 Rebe Tomczynski rtomczynski3@phpbb.com Female 143.142.118.112
5 Mart Flament mflament4@livejournal.com Male 133.92.4.202
6 Wini Thomlinson wthomlinson5@smugmug.com Female 54.248.170.235
7 Peria Bainbrigge pbainbrigge6@mozilla.com Female 68.67.104.142
8 Tillie Abbot tabbot7@google.it Female 21.114.196.35
9 Hilde Merkle hmerkle8@discovery.com Female 72.118.37.139
{"id":1,"first_name":"Weidar","last_name":"Smyth","email":"wsmyth0@loc.gov","gender":"Male","phone_numbers":["550-543-4729"],"addresses":[{"street_number":"344","street_name":"Hallows","city":"Ridgely","state":"Maryland","postal_code":"21684"}]}
{"id":2,"first_name":"Emmit","last_name":"Ogborn","email":"eogborn1@wisc.edu","gender":"Male","phone_numbers":["374-344-7772","427-353-9104","349-982-6073"],"addresses":[{"street_number":"4","street_name":"Ryan","city":"Waco","state":"Texas","postal_code":"76796"},{"street_number":"7","street_name":"Holmberg","city":"Milwaukee","state":"Wisconsin","postal_code":"53205"}]}
{"id":3,"first_name":"Micah","last_name":"Dadswell","email":"mdadswell2@edublogs.org","gender":"Male","phone_numbers":["846-266-0132","231-711-7352"],"addresses":[{"street_number":"3","street_name":"Buena Vista","city":"Fort Worth","state":"Texas","postal_code":"76162"},{"street_number":"9865","street_name":"Iowa","city":"New York City","state":"New York","postal_code":"10009"},{"street_number":"6329
import pandas as pd
# Reading order_items
order_items_path = "/Users/itversity/Research/data/retail_db/order_items/part-00000"
order_items_schema = [
"order_item_id",
"order_item_order_id",
"order_item_product_id",
"order_item_quantity",
orders_path = "/Users/itversity/Research/data/retail_db/orders/part-00000"
orders_schema = [
"order_id",
"order_date",
"order_customer_id",
"order_status"
]
orders = pd.read_csv(orders_path,
header=None,
orders_path = "/Users/itversity/Research/data/retail_db/orders/part-00000"
orders_schema = [
"order_id",
"order_date",
"order_customer_id",
"order_status"
]
orders = pd.read_csv(orders_path,
header=None,
names=orders_schema