Skip to content

Instantly share code, notes, and snippets.

View liannewriting's full-sized avatar

Lianne & Justin @ Just into Data liannewriting

View GitHub Profile
@liannewriting
liannewriting / transform_data.py
Last active December 9, 2022 16:03
xgboost python machine learning
# list and drop columns that are less related to the target based on my judgment
cols_to_drop = ['duration', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']
# at the same time, rename the columns so they are understandable. Please read the UCI page (https://archive.ics.uci.edu/ml/datasets/bank+marketing) for details
df = df.drop(columns=cols_to_drop).rename(columns={'job': 'job_type', 'default': 'default_status',
'housing': 'housing_loan_status', 'loan': 'personal_loan_status',
'contact': 'contact_type', 'month': 'contact_month',
'day_of_week': 'contact_day_of_week', 'campaign': 'num_contacts',
'pdays': 'days_last_contact', 'previous': 'previous_contacts',
'poutcome': 'previous_outcome',
@liannewriting
liannewriting / load_data.py
Last active December 7, 2022 23:51
xgboost python machine learning
import pandas as pd
# please use the dataset bank-additional.zip and extract it
df = pd.read_csv('bank-additional/bank-additional/bank-additional-full.csv', delimiter=';')
@liannewriting
liannewriting / prediction_manual_auto_comparison.py
Created August 9, 2022 14:09
time series prediction arima model python
forecast_test_auto = auto_arima.predict(n_periods=len(df_test))
df['forecast_auto'] = [None]*len(df_train) + list(forecast_test_auto)
df.plot()
@liannewriting
liannewriting / evaluation_auto.py
Created August 9, 2022 14:08
time series prediction arima model python
mae = mean_absolute_error(df_test, forecast_test_auto)
mape = mean_absolute_percentage_error(df_test, forecast_test_auto)
rmse = np.sqrt(mean_squared_error(df_test, forecast_test_auto))
print(f'mae - auto: {mae}')
print(f'mape - auto: {mape}')
print(f'rmse - auto: {rmse}')
@liannewriting
liannewriting / evaluation_manual.py
Last active August 12, 2022 16:04
time series prediction arima model python
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
mae = mean_absolute_error(df_test, forecast_test)
mape = mean_absolute_percentage_error(df_test, forecast_test)
rmse = np.sqrt(mean_squared_error(df_test, forecast_test))
print(f'mae - manual: {mae}')
print(f'mape - manual: {mape}')
print(f'rmse - manual: {rmse}')
@liannewriting
liannewriting / auto_arima_summary.py
Created August 9, 2022 14:03
time series prediction arima model python
auto_arima.summary()
@liannewriting
liannewriting / pmdarima_auto_arima.py
Last active August 18, 2022 15:31
time series prediction arima model python
import pmdarima as pm
auto_arima = pm.auto_arima(df_train, stepwise=False, seasonal=False)
auto_arima
@liannewriting
liannewriting / model_manual_forecast.py
Created August 9, 2022 13:54
time series prediction arima model python
forecast_test = model_fit.forecast(len(df_test))
df['forecast_manual'] = [None]*len(df_train) + list(forecast_test)
df.plot()
@liannewriting
liannewriting / acf_pacf_residuals.py
Created August 9, 2022 13:53
time series prediction arima model python
acf_res = plot_acf(residuals)
pacf_res = plot_pacf(residuals)
@liannewriting
liannewriting / residual_plots.py
Created August 9, 2022 13:52
time series prediction arima model python
import matplotlib.pyplot as plt
residuals = model_fit.resid[1:]
fig, ax = plt.subplots(1,2)
residuals.plot(title='Residuals', ax=ax[0])
residuals.plot(title='Density', kind='kde', ax=ax[1])
plt.show()