Skip to content

Instantly share code, notes, and snippets.

View AVJdataminer's full-sized avatar
🎯
Focusing

Aiden V Johnson AVJdataminer

🎯
Focusing
View GitHub Profile
@AVJdataminer
AVJdataminer / Fill_NA.R
Created July 27, 2017 23:58
Fill NA's in R
Fill_NA<-function(x,home,modelpath){
setwd(modelpath)
if(nrow(x)<1){x<-read.csv(list.files(pattern = "indata_file.csv"))}
#split into numeric and non-numeric
require(PCAmixdata)
ds=splitmix(x)
impute.med <- function(x) {
z <- median(x, na.rm = TRUE)
x[is.na(x)] <- z
1BuUfzs5npAQuTKEvynYNkA5eRDpu3WTL5.id" is my Blockstack ID. https://onename.com/1BuUfzs5npAQuTKEvynYNkA5eRDpu3WTL5
@AVJdataminer
AVJdataminer / Time_2_single_cols
Last active April 14, 2019 14:11
time to day of year
import datetime
def time_stamp(df,time_col):
df[time_col] = pd.to_datetime(df[time_col])
df["year"] = df[time_col].dt.year
df["month"] = df[time_col].dt.month
df["day"] = df[time_col].dt.day
df['day_of_yr']=df[time_col].dt.strftime('%j').astype(int)
time_stamp(df,"Latest Launch")
@AVJdataminer
AVJdataminer / get_dummies_object_types
Created April 14, 2019 14:30
get dummies columns for object type columns only and concat to dataframe
dfo=df.select_dtypes(include=['object']) # select object type columns
df = pd.concat([df.drop(dfo, axis=1), pd.get_dummies(dfo)], axis=1)
@AVJdataminer
AVJdataminer / Train_test_scale
Created April 15, 2019 00:05
train and test split followed by scaler applied to both
from sklearn.model_selection import train_test_split
df=df.dropna()
X=df.drop(['4-year resale value'], axis=1)
y=df[['4-year resale value']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
from sklearn import preprocessing
import numpy as np
scaler = preprocessing.StandardScaler().fit(X_train)
X_scaled=scaler.transform(X_train)
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
import sklearn.decomposition
pca = sklearn.decomposition.PCA()
pca.fit(X)
variances = pca.explained_variance_ratio_
def select_n_components(var_ratio, goal_var: float) -> int:
total_variance = 0.0
n_components = 0
# For the explained variance of each feature:
for explained_variance in var_ratio:
# Add the explained variance to the total
@AVJdataminer
AVJdataminer / Read_files_R.R
Created June 27, 2019 21:48
Read in multiple files in R
filelist = list.files(pattern = ".*.txt")
datalist = lapply(filelist, function(x)read.table(x, header=T))
#assuming the same header/columns for all files
datafr = do.call("rbind", datalist)
def ext_date(df, column):
df[column] = pd.to_datetime(df[column],format = "%Y-%m-%d")
df[column] = df[column].dt.strftime('%Y%m%d')
from datetime import datetime,timedelta
def dt_splitter(date_col, X, y, test_size):
xw_date=pd.DataFrame(X).merge(date_col,left_index=True, right_index=True)
ad = (max(xw_date.date)- min(xw_date.date)).days*test_size
split_date = min(xw_date.date) + timedelta(days=ad)
X_train = xw_date.loc[xw_date['date'] <= split_date].drop(['date'], axis=1).values
X_test = xw_date.loc[xw_date['date'] > split_date].drop(['date'], axis=1).values
yw_date=pd.DataFrame(y).merge(date_col,left_index=True, right_index=True)
y_train=yw_date.loc[yw_date['date'] <= split_date].drop(['date'], axis=1).values
y_test=yw_date.loc[yw_date['date'] > split_date].drop(['date'], axis=1).values