Created
June 13, 2020 23:11
-
-
Save michelkana/c6528bcac94fcb5feabe32c7e666a88c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
%matplotlib inline | |
from sklearn.metrics import r2_score | |
# importing R functions | |
#!pip install rpy2 | |
import rpy2.robjects as robjects | |
r_predict = robjects.r["predict"] | |
r_lm = robjects.r["lm"] | |
df_r2_scores = pd.DataFrame({'R2':[]}) | |
# function that fits a linear model on training dataset, plots the regression line and report the R2 on test | |
# The 'order' parameter can be specified to add higher order polynomial terms | |
def fit_polynomial(order, df_train_summary=df_train_summary, df_test_summary=df_test_summary, title='model'): | |
# populating vectors R understands | |
r_date_train = robjects.FloatVector(df_train_summary.date) | |
r_price_train = robjects.FloatVector(df_train_summary.price) | |
# populating dataframe R understands | |
bookings_train_r = robjects.DataFrame({'date': r_date_train, 'price': r_price_train}) | |
# populating formula R understands | |
simple_formula = robjects.Formula("price ~ poly(date, {}, raw=TRUE)".format(order)) | |
# running models in R | |
bookings_lm = r_lm(formula=simple_formula, data=bookings_train_r) | |
# getting prediction line in R | |
predict_df = robjects.DataFrame({'date': robjects.FloatVector(np.linspace(0, 364, 100))}) | |
predictions = r_predict(bookings_lm, predict_df) | |
# getting results back to Python and plotting prediction line for train data | |
fig, ax = plt.subplots(1,2,figsize=(15,5)) | |
df_train_summary.plot.scatter(x='date',y='price',c='Red',title="{} - Bookings train data".format(title), ax=ax[0]) | |
ax[0].set_xlabel("date") | |
ax[0].set_ylabel("price"); | |
ax[0].plot(predict_df.rx2("date"),predictions); | |
# getting test predictions in R | |
predict_test_df = robjects.DataFrame({'date': robjects.FloatVector(df_test_summary.date)}) | |
predictions_test = r_predict(bookings_lm, predict_test_df) | |
# getting results back to Python and plotting prediction line for test data | |
df_test_summary.plot.scatter(x='date',y='price',c='Green',title="{} - Bookings test data".format(title), ax=ax[1]) | |
ax[1].set_xlabel("date") | |
ax[1].set_ylabel("price"); | |
ax[1].plot(predict_test_df.rx2("date"),predictions_test); | |
# computing R2 score | |
r2 = r2_score(df_test_summary.price, predictions_test) | |
print(title) | |
print("R2 on test: {}".format(r2)) | |
df_r2_scores.loc[title] = r2 | |
fit_polynomial(5, title='5-order Polynomial') | |
fit_polynomial(25, title='25-order Polynomial') | |
fit_polynomial(50, title='50-order Polynomial') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment