Skip to content

Instantly share code, notes, and snippets.

@arun057
Created November 13, 2019 03:51
Show Gist options
  • Save arun057/125d894ff741d1411e517101d3810f83 to your computer and use it in GitHub Desktop.
Save arun057/125d894ff741d1411e517101d3810f83 to your computer and use it in GitHub Desktop.
import json
# This is probably super straight forward with either pandas
# or a simple sqlite db. Showing how I would implement it without those
# in this sample.
highest_unique_sales = 0
highest_unique_sales_pids = []
highest_sales_qty = 0
highest_sales_qty_pids= []
by_counts = {}
by_users = {}
fh = open('SWE sample data - Q2 data.csv')
for line in fh:
data = json.loads(line)
uid = str(data["user_id"])
pid = str(data["product_id"])
qty = int(data["quantity"])
if pid in by_users:
if uid not in by_users[pid]:
by_users[pid][uid] = 1
else:
by_users[pid][uid] += 1
else:
by_users[pid] = {
uid: 1
}
if pid in by_counts:
by_counts[pid] += qty
else:
by_counts[pid] = qty
# By doing the aggregation during the file read process,
# complexity reduces since we arent doing multiple loops anymore.
unique_sales = len(by_users[pid])
if unique_sales > highest_unique_sales:
highest_unique_sales = unique_sales
highest_unique_sales_pids = [pid]
elif unique_sales == highest_unique_sales and pid not in highest_unique_sales_pids:
highest_unique_sales_pids.append(pid)
if by_counts[pid] > highest_sales_qty:
highest_sales_qty = by_counts[pid]
highest_sales_qty_pids = [pid]
elif by_counts[pid] == highest_sales_qty:
highest_sales_qty_pids.append(pid)
fh.close()
print("Most popular product(s) based on the number of purchasers: [ "+ ",".join(highest_unique_sales_pids)+ " ]")
print("Most popular product(s) based on the quantity of goods sold: [ " + ",".join(highest_sales_qty_pids) + " ]")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment