Skip to content

Instantly share code, notes, and snippets.

@dataman-git
dataman-git / Bike_perf
Created February 29, 2020 23:08
Bike_perf
# Performance 0
from fbprophet.diagnostics import performance_metrics
bike_0_p = performance_metrics(bike_0_cv)
bike_0_p.head()
# Performance 2
from fbprophet.diagnostics import performance_metrics
bike_2_p = performance_metrics(bike_2_cv)
bike_2_p.head()
@dataman-git
dataman-git / data
Last active March 4, 2020 01:24
data
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_stata('/AEJfigs.dta')
df = df.dropna() # drop two observations that have missing data
df.shape
@dataman-git
dataman-git / bin
Last active March 3, 2020 20:22
bin
# Create binned age values
df['age_bin'] = pd.qcut(df['agecell'],10)
df['age_bin'].value_counts()
df['over21'] = 0
df.loc[df['agecell']>=21,'over21'] =1
df['over21'].value_counts()
# Plot
plt.figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
@dataman-git
dataman-git / ols
Last active March 4, 2020 04:14
ols
re = smf.ols(formula = "all ~ over21 ", data = df).fit()
print(re.summary())
plt.figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
plt.scatter(df['agecell'],df['all'], color="blue")
l=df.loc[df['over21']==0,'over21'].count()
plt.plot(df['agecell'][0:(l-1)], re.predict()[0:(l-1)], '-', color="r")
plt.plot(df['agecell'][l:], re.predict()[l:], '-', color="r")
plt.title("Regression Discontinuity: Before and After the Cutoff", fontsize="14")
import pandas as pd
from pandas_profiling import ProfileReport
import statsmodels.formula.api as smf
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('/njmin3.csv')
df.head()
model = smf.ols(formula = "fte ~ d_nj + kfc + roys + wendys
+ CO_OWNED + SOUTHJ + CENTRALJ + PA1", data = df).fit()
print(model.summary())
# NJ Before and after
NJ_before = df.loc[(df['nj']==1) & (df['d']!=1),'fte'].mean()
NJ_after = df.loc[(df['nj']==1) & (df['d']==1),'fte'].mean()
# PA Before and after
PA_before = df.loc[(df['nj']!=1) & (df['d']!=1),'fte'].mean()
PA_after = df.loc[(df['nj']!=1) & (df['d']==1),'fte'].mean()
# NJ counterfactual (if no treatment)
NJ_counterfactual = NJ_before + ( NJ_before - PA_before )
@dataman-git
dataman-git / NJ_PA_plot
Created March 7, 2020 18:20
NJ_PA_plot
plt.figure(num=None, figsize=(4, 3), dpi=80, facecolor='w', edgecolor='k')
fig, ax = plt.subplots()
lineNJ, = ax.plot(['0', '1'], [NJ_before, NJ_after],color='blue',label='NJ before and after')
linePA, = ax.plot(['0', '1'], [PA_before, PA_after],color = 'red',label = 'PA before and after')
lineNJ0, = ax.plot(['0', '1'], [NJ_before, NJ_counterfactual],color = 'blue',linestyle='dashed',label='NJ counterfactual')
ax.legend()
plt.ylim(15, 28)
plt.title("Difference-in-difference: Before and After", fontsize="14")
@dataman-git
dataman-git / grunfeld
Created March 16, 2020 21:34
frunfeld
from statsmodels.datasets import grunfeld
data = grunfeld.load_pandas().data
data = data.set_index(["firm","year"])
print(data.head())