Skip to content

Instantly share code, notes, and snippets.

Avatar
👀
Look out, working on exciting things :)

Khyati Mahendru KhyatiMahendru

👀
Look out, working on exciting things :)
View GitHub Profile
@KhyatiMahendru
KhyatiMahendru / creditcardfrauddetection.ipynb
Last active May 11, 2020
CreditCardFraudDetection.ipynb
View creditcardfrauddetection.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View mtcars_regression.py
# import dataset
import pandas as pd
data = pd.read_csv('mtcars.csv')
# remove string and categorical variables
cat_var = ['model', 'cyl', 'vs', 'am', 'gear', 'carb']
data = data.drop(cat_var, axis = 1)
# scale the variables to prevent coefficients from becoming too large or too small
from sklearn.preprocessing import MinMaxScaler
View OLS.py
import statsmodels.api as sm
model = sm.OLS(y, X[:, 4]).fit()
model.summary()
View generate_regression_data.py
from sklearn.datasets import make_regression
X, y = make_regression(n_samples = 20, n_features = 6, random_state = 2, noise = 0.5)
View decisiontree_entropy.py
from sklearn.tree import DecisionTreeClassifier
clf_entropy = DecisionTreeClassifier(criterion = 'entropy', random_state = 33)
clf_entropy.fit(X, Y)
View decisiontree_gini.py
from sklearn.tree import DecisionTreeClassifier
clf_gini = DecisionTreeClassifier(criterion = 'gini', random_state = 33)
clf_gini.fit(X, Y)
View silhouette_score.py
from sklearn.metrics import silhouette_score
sil = []
kmax = 10
# dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2
for k in range(2, kmax+1):
kmeans = KMeans(n_clusters = k).fit(x)
labels = kmeans.labels_
sil.append(silhouette_score(x, labels, metric = 'euclidean'))
View elbow_method.py
from sklearn.cluster import KMeans
# function returns WSS score for k values from 1 to kmax
def calculate_WSS(points, kmax):
sse = []
for k in range(1, kmax+1):
kmeans = KMeans(n_clusters = k).fit(points)
centroids = kmeans.cluster_centers_
pred_clusters = kmeans.predict(points)
curr_sse = 0
View generate_clustering_data.py
from sklearn.datasets import make_blobs
# Create dataset with 3 random cluster centers and 1000 datapoints
x, y = make_blobs(n_samples = 1000, centers = 3, n_features=2, shuffle=True, random_state=31)
View model_KL.py
# importing requirements
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import adam
# alpha = 0.001 as given in the lr parameter in adam() optimizer
# build the model
model_alpha1 = Sequential()
model_alpha1.add(Dense(50, input_dim=2, activation='relu'))