Skip to content

Instantly share code, notes, and snippets.

@ksv-muralidhar
ksv-muralidhar / stratified_cv1.py
Created February 16, 2021 02:55
stratified cv part 1
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, StratifiedKFold, StratifiedShuffleSplit, KFold
make_class = make_classification(n_samples=500,n_features=3,n_redundant=0,n_informative=2,n_classes=3,n_clusters_per_class=1,random_state=11)
data = pd.DataFrame(make_class[0],columns=range(make_class[0].shape[1]))
data['target'] = make_class[1]
data.head()
@ksv-muralidhar
ksv-muralidhar / outlier_detect_1.py
Created February 16, 2021 03:24
outlier detection
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.DataFrame(load_wine()["data"],columns=load_wine()["feature_names"])
data.head()
from sklearn.datasets import load_diabetes
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# LOADING DIABETES DATA (INPUT FEATURES) AND STORING IT IN A DATA FRAME
data = pd.DataFrame(load_diabetes()["data"],columns=load_diabetes()["feature_names"])
#ADDING TARGET VARIABLE TO THE DATA FRAME
data["target"] = load_diabetes()["target"]
@ksv-muralidhar
ksv-muralidhar / py_plot_1.py
Last active February 16, 2021 04:24
python plots
import pandas as pd
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.DataFrame(load_iris()["data"],columns=load_iris()['feature_names'])
data["species"] = load_iris()["target"]
data.head()
@ksv-muralidhar
ksv-muralidhar / gauss_transform_1.py
Created February 16, 2021 04:36
Gaussian Transforms
from sklearn.preprocessing import FunctionTransformer, ColumnTransformer
log_transform = FunctionTransformer(lambda x: np.log(x))
ct = ColumnTransformer(transformers=[['log_transform',log_transform,list(range(len(X.columns)))]],remainder='passthrough')
log_X = ct.fit_transform(X).copy()
log_X = pd.DataFrame(log_X,columns=[0,1,2,3]).copy()
@ksv-muralidhar
ksv-muralidhar / gauss_test_1.py
Created February 16, 2021 04:57
Gauusian Tests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
#Converting the data from an array to a data frame
X = pd.DataFrame(load_iris()["data"]).copy()
@ksv-muralidhar
ksv-muralidhar / learning_curve_1.py
Created February 16, 2021 05:52
Learning Curve
from sklearn.model_selection import learning_curve
@ksv-muralidhar
ksv-muralidhar / api_1.py
Created February 16, 2021 07:11
API data extraction
import requests
import pandas as pd
from IPython.display import display
@ksv-muralidhar
ksv-muralidhar / hist_1.py
Last active February 16, 2021 12:45
histogram bins
fig,ax = plt.subplots(2,3,figsize=(15,10))
row = col = 0
np.random.seed(11)
norm_dist = np.random.randn(1000)
for n,i in enumerate(np.linspace(5,100,6)):
if (n>0) & (n%3==0):
row += 1
col = 0
sns.histplot(x=norm_dist,bins=int(i),ax=ax[row,col])
ax[row,col].set_title(f'bins = {int(i)}')
@ksv-muralidhar
ksv-muralidhar / pipe_1.py
Last active February 17, 2021 11:39
Pipeline
from sklearn.compose import ColumnTransformer