Skip to content

Instantly share code, notes, and snippets.

@dgadiraju
Created February 4, 2018 23:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgadiraju/5139c2b5d8d3dfdb09d92c82e3046929 to your computer and use it in GitHub Desktop.
Save dgadiraju/5139c2b5d8d3dfdb09d92c82e3046929 to your computer and use it in GitHub Desktop.
#map
orders = sc.textFile("/public/retail_db/orders")
help(orders.map)
#Get status
orders.map(lambda o: o.split(",")[3]).first()
#Get count
orders.map(lambda o: o.split(",")[1]).first()
#Convert date format from YYYY-MM-DD HH24:MI:SS -> YYYYMM
#Type cast date to integer
orders.map(lambda o: int(o.split(",")[1].split(" ")[0].replace("-", ""))).first()
orders.map(lambda o: int(o.split(",")[1].split(" ")[0].replace("-", ""))).take(10)
orders.map(lambda o: int(o.split(",")[1].split(" ")[0].replace("-", ""))).count()
#Create tuples
orders.map(lambda o: (o.split(",")[3], 1))
orderItems = sc.textFile("/public/retail_db/order_items")
orderItems.first()
for i in orderItems.take(10): print(i)
orderItemsMap = orderItems. \
map(lambda oi: (int(oi.split(",")[1]), float(oi.split(",")[4])))
orderItemsMap.first()
for i in orderItemsMap.take(10): print(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment