COVID-19 Predictor - Polynomial Regression
# COVID-19 cases predictor (polynomial regression for non-linear prediction) | |
# Data Source: https://ourworldindata.org/coronavirus-source-data | |
# Data Example | |
# id,cases | |
# 1,1 | |
# 2,4 | |
# 3,6 | |
# 4,8 | |
# 5,10 | |
# 6,12 | |
# 7,14 | |
# 8,16 | |
# 9,18 | |
# 10,20 | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.preprocessing import PolynomialFeatures | |
from sklearn import linear_model | |
### Load Data ### | |
data = pd.read_csv('covid_cases.csv', sep = ',') | |
data = data[['id', 'cases']] | |
num_rows = len(data.index) | |
print(f'Number of Rows: {num_rows}') | |
print('-' * 30);print(' Head ');print('-' * 30) | |
print(data.head()) | |
### Prepare Data ### | |
print('-' * 30);print(' Prepare Data ');print('-' * 30) | |
x = np.array(data['id']).reshape(-1, 1) | |
y = np.array(data['cases']).reshape(-1, 1) | |
plt.plot(y, '-m') | |
#plt.show() | |
polyFeature = PolynomialFeatures(degree=3) # change degree to improve results | |
x = polyFeature.fit_transform(x) | |
#print(x) | |
### Training Data ### | |
print('-' * 30);print(' Training Data ');print('-' * 30) | |
model = linear_model.LinearRegression() | |
model.fit(x, y) | |
accuracy = model.score(x, y) | |
print(f'Accuracy: {round(accuracy * 100, 3)} %') | |
y0 = model.predict(x) | |
### Prediction ### | |
days = 10 | |
print('-' * 30);print(' Prediction ');print('-' * 30) | |
print(f'Prediction - Cases after {days} days: ', end='') | |
print(round(int(model.predict(polyFeature.fit_transform([[ num_rows + days ]]))), 2), 'cases') | |
x1 = np.array(list(range(1, num_rows + days))).reshape(-1, 1) | |
y1 = model.predict(polyFeature.fit_transform(x1)) | |
plt.plot(y1, '--r') | |
plt.plot(y0, '--b') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment