Skip to content

Instantly share code, notes, and snippets.

@michelkana
Created June 13, 2020 23:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save michelkana/c6528bcac94fcb5feabe32c7e666a88c to your computer and use it in GitHub Desktop.
Save michelkana/c6528bcac94fcb5feabe32c7e666a88c to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import r2_score
# importing R functions
#!pip install rpy2
import rpy2.robjects as robjects
r_predict = robjects.r["predict"]
r_lm = robjects.r["lm"]
df_r2_scores = pd.DataFrame({'R2':[]})
# function that fits a linear model on training dataset, plots the regression line and report the R2 on test
# The 'order' parameter can be specified to add higher order polynomial terms
def fit_polynomial(order, df_train_summary=df_train_summary, df_test_summary=df_test_summary, title='model'):
# populating vectors R understands
r_date_train = robjects.FloatVector(df_train_summary.date)
r_price_train = robjects.FloatVector(df_train_summary.price)
# populating dataframe R understands
bookings_train_r = robjects.DataFrame({'date': r_date_train, 'price': r_price_train})
# populating formula R understands
simple_formula = robjects.Formula("price ~ poly(date, {}, raw=TRUE)".format(order))
# running models in R
bookings_lm = r_lm(formula=simple_formula, data=bookings_train_r)
# getting prediction line in R
predict_df = robjects.DataFrame({'date': robjects.FloatVector(np.linspace(0, 364, 100))})
predictions = r_predict(bookings_lm, predict_df)
# getting results back to Python and plotting prediction line for train data
fig, ax = plt.subplots(1,2,figsize=(15,5))
df_train_summary.plot.scatter(x='date',y='price',c='Red',title="{} - Bookings train data".format(title), ax=ax[0])
ax[0].set_xlabel("date")
ax[0].set_ylabel("price");
ax[0].plot(predict_df.rx2("date"),predictions);
# getting test predictions in R
predict_test_df = robjects.DataFrame({'date': robjects.FloatVector(df_test_summary.date)})
predictions_test = r_predict(bookings_lm, predict_test_df)
# getting results back to Python and plotting prediction line for test data
df_test_summary.plot.scatter(x='date',y='price',c='Green',title="{} - Bookings test data".format(title), ax=ax[1])
ax[1].set_xlabel("date")
ax[1].set_ylabel("price");
ax[1].plot(predict_test_df.rx2("date"),predictions_test);
# computing R2 score
r2 = r2_score(df_test_summary.price, predictions_test)
print(title)
print("R2 on test: {}".format(r2))
df_r2_scores.loc[title] = r2
fit_polynomial(5, title='5-order Polynomial')
fit_polynomial(25, title='25-order Polynomial')
fit_polynomial(50, title='50-order Polynomial')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment