Skip to content

Instantly share code, notes, and snippets.

@dgadiraju
Last active November 1, 2019 17:12
Show Gist options
  • Save dgadiraju/a2e02cce9d2fd2436a9427f4e7b8addb to your computer and use it in GitHub Desktop.
Save dgadiraju/a2e02cce9d2fd2436a9427f4e7b8addb to your computer and use it in GitHub Desktop.
import pandas as pd
# Reading order_items
order_items_path = "/Users/itversity/Research/data/retail_db/order_items/part-00000"
order_items_schema = [
"order_item_id",
"order_item_order_id",
"order_item_product_id",
"order_item_quantity",
"order_item_subtotal",
"order_item_product_price"
]
order_items = pd.read_csv(
order_items_path,
header=None,
names=order_items_schema
)
# Project order_item_order_id and order_item_subtotal
order_items[["order_item_order_id", "order_item_subtotal"]]
# Filter for order_item_order_id 2
order_items[order_items.order_item_order_id == 2]
order_items[order_items["order_item_order_id"] == 2]
# Compute revenue using order_item_subtotal for each order_item_order_id
order_items.groupby("order_item_order_id")["order_item_subtotal"]. \
sum()
# Compute revenue using order_item_subtotal for each order_item_order_id with alias
order_items.groupby("order_item_order_id")["order_item_subtotal"]. \
agg(order_revenue="sum")
# Reading orders
orders_path = "/Users/itversity/Research/data/retail_db/orders/part-00000"
orders_schema = [
"order_id",
"order_date",
"order_customer_id",
"order_status"
]
orders = pd.read_csv(
orders_path,
header=None,
names=orders_schema
)
# Join orders and order_items
orders.set_index("order_id"). \
join(order_items.set_index("order_item_order_id"))
# Compute Daily Revenue using
# orders.order_date and order_items.order_item_order_subtotal
# considering only COMPLETE and CLOSED orders.
orders_filtered = orders[orders.order_status.isin(["COMPLETE", "CLOSED"])]
orders_join = orders_filtered.set_index("order_id"). \
join(order_items.set_index("order_item_order_id"))
daily_revenue = orders_join.groupby("order_date")["order_item_subtotal"].agg(revenue="sum").round(2)
daily_revenue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment