Skip to content

Instantly share code, notes, and snippets.

View AVJdataminer's full-sized avatar
🎯
Focusing

Aiden V Johnson AVJdataminer

🎯
Focusing
View GitHub Profile
input_url = input("Enter the github url: ")
y = input_url.replace('/blob','')
x = y.replace('github.com','raw.githubusercontent.com')
print("raw url: ", x )
# Make a function to find the MSE of a single ARIMA model
def evaluate_arima_model(data, arima_order):
# Needs to be an integer because it is later used as an index.
split=int(len(data) * 0.8)
train, test = data[0:split], data[split:len(data)]
past=[x for x in train]
# make predictions
predictions = list()
for i in range(len(test)):#timestep-wise comparison between test data and one-step prediction ARIMA model.
model = ARIMA(past, order=arima_order)
import PIL
from PIL import Image
import os
import sys
path = "/path/to/file"
dirs = [file for file in os.listdir(path) if file.endswith('.png')]
#pixelsize = 500;
def new_dt_split(date_col, X, y, input_date):
date_col = pd.to_datetime(date_col)
xw_date=pd.DataFrame(X).merge(date_col, left_index=True, right_index=True)
X_train = xw_date.loc[xw_date['date'] <= input_date].drop(['date'], axis=1).values
X_test = xw_date.loc[xw_date['date'] >= input_date].drop(['date'], axis=1).values
yw_date=pd.DataFrame(y).merge(date_col, left_index=True, right_index=True)
y_train = yw_date.loc[yw_date['date'] <= input_date].drop(['date'], axis=1).values
y_test = yw_date.loc[yw_date['date'] >= input_date].drop(['date'], axis=1).values
return X_train, X_test, y_train, y_test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
stratify=df['Country'],
test_size=0.25)
from datetime import datetime,timedelta
def dt_splitter(date_col, X, y, test_size):
xw_date=pd.DataFrame(X).merge(date_col,left_index=True, right_index=True)
ad = (max(xw_date.date)- min(xw_date.date)).days*test_size
split_date = min(xw_date.date) + timedelta(days=ad)
X_train = xw_date.loc[xw_date['date'] <= split_date].drop(['date'], axis=1).values
X_test = xw_date.loc[xw_date['date'] > split_date].drop(['date'], axis=1).values
yw_date=pd.DataFrame(y).merge(date_col,left_index=True, right_index=True)
y_train=yw_date.loc[yw_date['date'] <= split_date].drop(['date'], axis=1).values
y_test=yw_date.loc[yw_date['date'] > split_date].drop(['date'], axis=1).values
def ext_date(df, column):
df[column] = pd.to_datetime(df[column],format = "%Y-%m-%d")
df[column] = df[column].dt.strftime('%Y%m%d')
@AVJdataminer
AVJdataminer / Read_files_R.R
Created June 27, 2019 21:48
Read in multiple files in R
filelist = list.files(pattern = ".*.txt")
datalist = lapply(filelist, function(x)read.table(x, header=T))
#assuming the same header/columns for all files
datafr = do.call("rbind", datalist)
import sklearn.decomposition
pca = sklearn.decomposition.PCA()
pca.fit(X)
variances = pca.explained_variance_ratio_
def select_n_components(var_ratio, goal_var: float) -> int:
total_variance = 0.0
n_components = 0
# For the explained variance of each feature:
for explained_variance in var_ratio:
# Add the explained variance to the total
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,