Skip to content

Instantly share code, notes, and snippets.

@natematias
Created February 19, 2015 03:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save natematias/8de9d48148144b478454 to your computer and use it in GitHub Desktop.
Save natematias/8de9d48148144b478454 to your computer and use it in GitHub Desktop.
Distribution of Dollars Spent at Chipotle per Order
import codecs
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from dateutil import *
import math
import statsmodels.formula.api as sm
from collections import Counter
import csv
# DATA FROM https://github.com/TheUpshot/chipotle
orders = []
with open("orders.tsv") as tsv:
for line in csv.reader(tsv, dialect="excel-tab"):
orders.append(line)
columns = orders.pop(0)
chipotle_df=pd.DataFrame(orders)
chipotle_df.columns = columns
def price_to_float(price):
stripped = price.replace("$","")
try:
return float(stripped)
except:
return None
chipotle_df['dollars']=chipotle_df['item_price'].map(price_to_float)
order_gb = chipotle_df.groupby("order_id")
order_df = order_gb.aggregate(np.sum)
order_gb['dollars'].sum().hist(bins=40, normed=True, color=sns.xkcd_rgb["light blue"])
order_gb['dollars'].sum().plot(kind="kde", color=sns.xkcd_rgb["pale red"])
plt.title("Histogram and k density of dollars spent per order at Chipotle n = 1834")
plt.show()
import scipy.stats as stats
stats.probplot(order_df['dollars'], dist="norm", plot=pylab)
plt.title("QQ Plot of dollars spent per order at Chipotle, n = 1834")
pylab.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment