This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input_url = input("Enter the github url: ") | |
y = input_url.replace('/blob','') | |
x = y.replace('github.com','raw.githubusercontent.com') | |
print("raw url: ", x ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make a function to find the MSE of a single ARIMA model | |
def evaluate_arima_model(data, arima_order): | |
# Needs to be an integer because it is later used as an index. | |
split=int(len(data) * 0.8) | |
train, test = data[0:split], data[split:len(data)] | |
past=[x for x in train] | |
# make predictions | |
predictions = list() | |
for i in range(len(test)):#timestep-wise comparison between test data and one-step prediction ARIMA model. | |
model = ARIMA(past, order=arima_order) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import PIL | |
from PIL import Image | |
import os | |
import sys | |
path = "/path/to/file" | |
dirs = [file for file in os.listdir(path) if file.endswith('.png')] | |
#pixelsize = 500; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def new_dt_split(date_col, X, y, input_date): | |
date_col = pd.to_datetime(date_col) | |
xw_date=pd.DataFrame(X).merge(date_col, left_index=True, right_index=True) | |
X_train = xw_date.loc[xw_date['date'] <= input_date].drop(['date'], axis=1).values | |
X_test = xw_date.loc[xw_date['date'] >= input_date].drop(['date'], axis=1).values | |
yw_date=pd.DataFrame(y).merge(date_col, left_index=True, right_index=True) | |
y_train = yw_date.loc[yw_date['date'] <= input_date].drop(['date'], axis=1).values | |
y_test = yw_date.loc[yw_date['date'] >= input_date].drop(['date'], axis=1).values | |
return X_train, X_test, y_train, y_test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, | |
stratify=df['Country'], | |
test_size=0.25) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime,timedelta | |
def dt_splitter(date_col, X, y, test_size): | |
xw_date=pd.DataFrame(X).merge(date_col,left_index=True, right_index=True) | |
ad = (max(xw_date.date)- min(xw_date.date)).days*test_size | |
split_date = min(xw_date.date) + timedelta(days=ad) | |
X_train = xw_date.loc[xw_date['date'] <= split_date].drop(['date'], axis=1).values | |
X_test = xw_date.loc[xw_date['date'] > split_date].drop(['date'], axis=1).values | |
yw_date=pd.DataFrame(y).merge(date_col,left_index=True, right_index=True) | |
y_train=yw_date.loc[yw_date['date'] <= split_date].drop(['date'], axis=1).values | |
y_test=yw_date.loc[yw_date['date'] > split_date].drop(['date'], axis=1).values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ext_date(df, column): | |
df[column] = pd.to_datetime(df[column],format = "%Y-%m-%d") | |
df[column] = df[column].dt.strftime('%Y%m%d') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filelist = list.files(pattern = ".*.txt") | |
datalist = lapply(filelist, function(x)read.table(x, header=T)) | |
#assuming the same header/columns for all files | |
datafr = do.call("rbind", datalist) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn.decomposition | |
pca = sklearn.decomposition.PCA() | |
pca.fit(X) | |
variances = pca.explained_variance_ratio_ | |
def select_n_components(var_ratio, goal_var: float) -> int: | |
total_variance = 0.0 | |
n_components = 0 | |
# For the explained variance of each feature: | |
for explained_variance in var_ratio: | |
# Add the explained variance to the total |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import svm, datasets | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import confusion_matrix | |
def plot_confusion_matrix(cm, classes, | |
normalize=False, |
NewerOlder