This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("factoextra",lib.loc = "/mnt/distvol/R-4.0.5/library") | |
library("FactoMineR",lib.loc = "/mnt/distvol/R-4.0.5/library") | |
library("reshape2",lib.loc = "/mnt/distvol/R-4.0.5/library") | |
library("data.table",lib.loc = "/mnt/distvol/R-4.0.5/library") | |
data <- read.csv("/mnt/distvol/pca_dist_scaled.csv", row.names=1) | |
data2 <- read.csv("/mnt/distvol/states.csv", row.names=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#power = PowerTransformer(method='box-cox') | |
def testNormal (x): | |
k2, p = stats.normaltest(x) | |
alpha = .001 | |
#print("p = {:g}".format(p)) | |
if p < alpha: # null hypothesis: x comes from a normal distribution | |
#print(p) | |
#print(alpha) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.neighbors import KernelDensity | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.model_selection import LeaveOneOut | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def make_data(N, f=0.3, rseed=1): | |
rand = np.random.RandomState(rseed) | |
x = rand.randn(N) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def f3(Y): | |
internalFilter = filter_.copy() | |
internalFilter.remove(Y) | |
all_data_ = pd.concat([all_data[Y],all_data[internalFilter]], axis=1) | |
display(all_data_.describe()) | |
return(all_data_) | |
out = interactive(f3, Y=filter_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.formula.api as smf | |
import statsmodels.regression.quantile_regression as srq | |
#no ZCA until the end | |
#convert to t from s | |
t_ = pd.DataFrame(StandardScaler().fit_transform(transformed_yj)) | |
t_.columns = transformed.columns | |
t_.index = all_data.index | |
#from scipy.stats import chi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_sequences(sequences, n_steps_in, n_steps_out): | |
X, y = list(), list() | |
for i in range(len(sequences)): | |
# find the end of this pattern | |
end_ix = i + n_steps_in | |
out_end_ix = end_ix + n_steps_out | |
# check if we are beyond the dataset | |
if out_end_ix > len(sequences): | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import imgkit | |
def highlight_greaterthan(x): | |
if abs(x['Anomaly']) >= .6: | |
return ['background-color: purple']*12 | |
elif abs(x['Anomaly']) >= .55: | |
return ['background-color: red']*12 | |
elif abs(x['Anomaly']) >= .5: | |
return ['background-color: yellow']*12 | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#notebook: | |
#https://github.com/thistleknot/python-ml/blob/master/code/pcorr-significance.ipynb | |
import pandas as pd | |
import numpy as np | |
from scipy import stats # For in-built method to get PCC | |
import scipy | |
from sklearn.model_selection import KFold | |
import pingouin as pg | |
from sklearn.preprocessing import StandardScaler | |
scaler = StandardScaler() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fit_linear_reg(X,Y,train_i,test_i): | |
#Fit linear regression model and return RSS and R squared values | |
model_k = linear_model.LinearRegression(fit_intercept = True) | |
model_k.fit(X.iloc[train_i],Y.iloc[train_i]) | |
RSS = mean_squared_error(Y.iloc[test_i],model_k.predict(X.iloc[test_i])) * len(Y) | |
R_squared = model_k.score(X.iloc[test_i],Y.iloc[test_i]) | |
return RSS, R_squared | |
# ransac regression on a dataset with outliers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://towardsdatascience.com/pipelines-custom-transformers-in-scikit-learn-the-step-by-step-guide-with-python-code-4a7d9b068156 | |
#https://github.com/HCGrit/MachineLearning-iamJustAStudent/blob/master/PipelineFoundation/Pipeline_Experiment.ipynb | |
import pandas as pd | |
from sklearn.compose import TransformedTargetRegressor | |
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score, RepeatedKFold, train_test_split |