Skip to content

Instantly share code, notes, and snippets.

View pkgandhi's full-sized avatar

Pratik Gandhi pkgandhi

  • Boston
View GitHub Profile
@pkgandhi
pkgandhi / AR_MA.py
Last active August 5, 2020 01:27
Fitting the AR and MA Models
# Loading the packages
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Loading the data
data = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv')
# Setting the month as index
data = data.set_index('Month')
@pkgandhi
pkgandhi / ADFTest.py
Last active August 9, 2020 17:27
Using ADF test to check stationarity
# Loading the packages
import pandas as pd
import numpy as np
import statsmodels.tsa.stattools as sm
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
@pkgandhi
pkgandhi / PPTest.py
Created August 9, 2020 17:56
Using PP test to check stationarity
# Loading the packages
import pandas as pd
import pmdarima
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
data = data.set_index('Month')
# Conducting PP test:
@pkgandhi
pkgandhi / KPSSTest.py
Created August 9, 2020 18:43
Using KPSS test to check stationarity
# Loading the packages
import pandas as pd
import numpy as np
import statsmodels.tsa.stattools as sm
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
@pkgandhi
pkgandhi / OCSBTest.py
Created August 9, 2020 21:18
Using OCSV test to determine if the series needs D
# Loading the packages
import pandas as pd
import pmdarima
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
data = data.set_index('Month')
# Conducting OCSB test:
@pkgandhi
pkgandhi / decompose_plot.py
Created August 10, 2020 00:48
Decomposition Plot using statsmodels package
# Loading the packages
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
data = data.set_index('Month')
@pkgandhi
pkgandhi / ACF_PACF.py
Created August 10, 2020 01:32
Autocorrelation and Partial Autocorrelation Plot
# Loading the packages
import pandas as pd
from statsmodels.graphics import tsaplots
plt.style.use('fivethirtyeight')
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
data = data.set_index('Month')
@pkgandhi
pkgandhi / CHTest.py
Created August 10, 2020 14:48
Using CH Test to test a stable seasonal pattern
# Loading the packages
import pandas as pd
import pmdarima
# Loading the dataset:
data = pd.read_csv('../AirPassengers.csv')
data = data.rename(columns = {'#Passengers':'Passengers'})
data = data.set_index('Month')
# Conducting CH test:
@pkgandhi
pkgandhi / basic_optuna.py
Last active August 16, 2020 18:29
Setting up Basic Optuna
# Importing the Packages:
import optuna
import pandas as pd
from sklearn import linear_model
from sklearn import ensemble
from sklearn import datasets
from sklearn import model_selection
#Grabbing a sklearn Classification dataset:
X,y = datasets.load_breast_cancer(return_X_y=True, as_frame=True)
@pkgandhi
pkgandhi / study_results.py
Last active August 16, 2020 18:32
Getting the results from study object
# Getting the best trial:
print(f"The best trial is : \n{study.best_trial}")
# >> Output:
#The best trial is :
#FrozenTrial(number=18, value=0.9631114824097281, datetime_start=datetime.datetime(2020, 8, 16, 14, 24, 37, 407344), datetime_complete=datetime.datetime(2020, 8, 16, 14, 24, 37, 675114), params={'classifier': 'RandomForest', 'rf_n_estimators': 153, 'rf_max_depth': 21},
#distributions={'classifier': CategoricalDistribution(choices=('LogReg', 'RandomForest')), 'rf_n_estimators': IntUniformDistribution(high=1000, low=10, step=1), 'rf_max_depth': IntLogUniformDistribution(high=32, low=2, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=18, state=TrialState.COMPLETE)
# Getting the best score:
print(f"The best value is : \n{study.best_value}")
# >> Output: