Last active
March 12, 2021 09:33
-
-
Save Proteusiq/74166ba47fa14369af354f66192caf3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pytrends.request import TrendReq | |
from sklearn.ensemble import GradientBoostingRegressor | |
from sklearn.neighbors import KNeighborsRegressor | |
from sktime.forecasting.base import ForecastingHorizon | |
from sktime.forecasting.compose import EnsembleForecaster, ReducedForecaster | |
from sktime.forecasting.model_selection import temporal_train_test_split | |
from sktime.performance_metrics.forecasting import smape_loss | |
from sktime.utils.plotting import plot_series | |
# fetch cyberbullying data from Google trends | |
pytrend = TrendReq(hl="en-US") | |
pytrend.build_payload( | |
kw_list=[ | |
"cyberbullying", | |
] | |
) | |
cyberbullying_df = pytrend.interest_over_time() | |
# transfrom DataFrame to Uni-Series of period | |
fow = cyberbullying_df["cyberbullying"].to_period(freq="W") | |
y_train, y_test = temporal_train_test_split(fow, test_size=36) | |
fh = ForecastingHorizon(y_test.index, is_relative=False) | |
# forecaster ensemble of knn and gradient boosting regressor | |
forecaster = EnsembleForecaster( | |
[ | |
( | |
"knn", | |
ReducedForecaster( | |
regressor=KNeighborsRegressor(n_neighbors=1), | |
window_length=52, | |
strategy="recursive", | |
scitype="regressor", | |
), | |
), | |
( | |
"gboost", | |
ReducedForecaster( | |
regressor=GradientBoostingRegressor(n_estimators=100, random_state=42), | |
window_length=52, | |
strategy="recursive", | |
scitype="regressor", | |
), | |
), | |
] | |
) | |
# train an ensemble forecasters and predict|forecast | |
forecaster.fit(y_train) | |
y_pred = forecaster.predict(fh) | |
# check our forecaster model’s performance - the lower the better | |
summary_metrics = ( | |
f"Symmetric Mean Absolute Percentage Error Loss: {smape_loss(y_test, y_pred):.3f}\n" | |
) | |
print(summary_metrics) | |
# generate a plot | |
fig, ax = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) | |
ax.set_title(f"Cyberbullying Forecasting\n{summary_metrics}") | |
fig.autofmt_xdate() | |
fig.savefig("trends.png", bbox_inches="tight") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment