Skip to content

Instantly share code, notes, and snippets.

@chrisdmell
Created January 15, 2022 17:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrisdmell/e2d02f24aaf7849916ebba369c947814 to your computer and use it in GitHub Desktop.
Save chrisdmell/e2d02f24aaf7849916ebba369c947814 to your computer and use it in GitHub Desktop.
f = final_cleaned_df.copy()
b1 = f[(f["Bus"] == "a6951a59b64579edcf822ab9ea4c0c83") & (f["Service_Date"] == "15-07-2020 00:00")]
b2 = f[(f["Bus"] == "ab479dab4a9e6bc3eaefe77a09f027ed") & (f["Service_Date"] == "15-07-2020 00:00")]
recorded_dates_df = pd.concat([b1[["RecordedAt_new"]], b2[["RecordedAt_new"]]], axis = 0).drop_duplicates().sort_values(by = "RecordedAt_new").reset_index().drop(columns = "index")
joined_1 = pd.merge(recorded_dates_df, b1, on=["RecordedAt_new"], how='left',suffixes=('_actuals', '_B1'))
joined_df = pd.merge(joined_1, b2, on=["RecordedAt_new"], how='left',suffixes=('_B1', '_B2'))
joined_df
cols_to_keep = ["RecordedAt_new", "Service_Date_B1","Bus_B1","Bus_B2", "average_price_s1_s2_filled_B1", "average_price_s1_s2_filled_B2"]
model_df = joined_df[cols_to_keep]
model_df_2 = model_df.drop_duplicates()
## replace null of service date
model_df_2['Service_Date_B1'] = model_df_2['Service_Date_B1'].fillna(model_df_2['Service_Date_B1'].value_counts().idxmax())
model_df_2['Bus_B1'] = model_df_2['Bus_B1'].fillna(model_df_2['Bus_B1'].value_counts().idxmax())
model_df_2['Bus_B1'] = model_df_2['Bus_B1'].fillna(model_df_2['Bus_B1'].value_counts().idxmax())
model_df_2.fillna(0, inplace = True)
test_a = model_df_2.sort_values(by = ["RecordedAt_new" ])
test_a = test_a[["Service_Date_B1","average_price_s1_s2_filled_B1" ]]
test_a["average_price_B1_new"] = test_a.groupby(["Service_Date_B1" ]).transform(lambda x: x.replace(to_replace=0, method='bfill'))
test_f = model_df_2.sort_values(by = ["RecordedAt_new" ])
test_f = test_f[["Service_Date_B1","average_price_s1_s2_filled_B2" ]]
test_f["average_price_B2_new"] = test_f.groupby(["Service_Date_B1" ]).transform(lambda x: x.replace(to_replace=0, method='bfill'))
model_df_2["average_price_B1_new"] = test_a["average_price_B1_new"]
model_df_2["average_price_B2_new"] = test_f["average_price_B2_new"]
model_df_3 = model_df_2[model_df_2["average_price_B1_new"] != 0][["average_price_B1_new","average_price_B2_new"] ]
from scipy.stats import hmean
## get the price change wrt to each bus price
model_df_2["price_cng_b1"] = abs(model_df_2.average_price_B1_new - model_df_2.average_price_B2_new)/model_df_2.average_price_B1_new
model_df_2["price_cng_b2"] = abs(model_df_2.average_price_B1_new - model_df_2.average_price_B2_new)/model_df_2.average_price_B2_new
model_df_2["harm_mean_price_cng"] = scipy.stats.hmean(model_df_2.iloc[:,8:10],axis=1)
model_df_2 = model_df_2[model_df_2["average_price_B1_new"] != 0]
model_df_2 = model_df_2[model_df_2["average_price_B2_new"] != 0]
model_df_2x = model_df_2.copy()
hm = scipy.stats.hmean(model_df_2x.iloc[:,8:10],axis=1)
display((max(hm) - min(hm))/ min(hm))
print("======================================================================================================")
model_df_3 = model_df_2[model_df_2["average_price_B1_new"] != 0][["price_cng_b1","price_cng_b2"] ]
model_df_3.plot();
plt.show()
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
# (X,Y)
regr.fit(np.array(model_df_2["price_cng_b1"]).reshape(-1,1),np.array(model_df_2["price_cng_b2"]).reshape(-1,1))
# The coefficients
print("Coefficients: \n", regr.coef_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment