This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Fill_NA<-function(x,home,modelpath){ | |
setwd(modelpath) | |
if(nrow(x)<1){x<-read.csv(list.files(pattern = "indata_file.csv"))} | |
#split into numeric and non-numeric | |
require(PCAmixdata) | |
ds=splitmix(x) | |
impute.med <- function(x) { | |
z <- median(x, na.rm = TRUE) | |
x[is.na(x)] <- z |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1BuUfzs5npAQuTKEvynYNkA5eRDpu3WTL5.id" is my Blockstack ID. https://onename.com/1BuUfzs5npAQuTKEvynYNkA5eRDpu3WTL5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
def time_stamp(df,time_col): | |
df[time_col] = pd.to_datetime(df[time_col]) | |
df["year"] = df[time_col].dt.year | |
df["month"] = df[time_col].dt.month | |
df["day"] = df[time_col].dt.day | |
df['day_of_yr']=df[time_col].dt.strftime('%j').astype(int) | |
time_stamp(df,"Latest Launch") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dfo=df.select_dtypes(include=['object']) # select object type columns | |
df = pd.concat([df.drop(dfo, axis=1), pd.get_dummies(dfo)], axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
df=df.dropna() | |
X=df.drop(['4-year resale value'], axis=1) | |
y=df[['4-year resale value']] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1) | |
from sklearn import preprocessing | |
import numpy as np | |
scaler = preprocessing.StandardScaler().fit(X_train) | |
X_scaled=scaler.transform(X_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import svm, datasets | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import confusion_matrix | |
def plot_confusion_matrix(cm, classes, | |
normalize=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn.decomposition | |
pca = sklearn.decomposition.PCA() | |
pca.fit(X) | |
variances = pca.explained_variance_ratio_ | |
def select_n_components(var_ratio, goal_var: float) -> int: | |
total_variance = 0.0 | |
n_components = 0 | |
# For the explained variance of each feature: | |
for explained_variance in var_ratio: | |
# Add the explained variance to the total |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filelist = list.files(pattern = ".*.txt") | |
datalist = lapply(filelist, function(x)read.table(x, header=T)) | |
#assuming the same header/columns for all files | |
datafr = do.call("rbind", datalist) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ext_date(df, column): | |
df[column] = pd.to_datetime(df[column],format = "%Y-%m-%d") | |
df[column] = df[column].dt.strftime('%Y%m%d') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime,timedelta | |
def dt_splitter(date_col, X, y, test_size): | |
xw_date=pd.DataFrame(X).merge(date_col,left_index=True, right_index=True) | |
ad = (max(xw_date.date)- min(xw_date.date)).days*test_size | |
split_date = min(xw_date.date) + timedelta(days=ad) | |
X_train = xw_date.loc[xw_date['date'] <= split_date].drop(['date'], axis=1).values | |
X_test = xw_date.loc[xw_date['date'] > split_date].drop(['date'], axis=1).values | |
yw_date=pd.DataFrame(y).merge(date_col,left_index=True, right_index=True) | |
y_train=yw_date.loc[yw_date['date'] <= split_date].drop(['date'], axis=1).values | |
y_test=yw_date.loc[yw_date['date'] > split_date].drop(['date'], axis=1).values |
OlderNewer