Skip to content

Instantly share code, notes, and snippets.

View KhyatiMahendru's full-sized avatar
👀
Look out, working on exciting things :)

Khyati Mahendru KhyatiMahendru

👀
Look out, working on exciting things :)
View GitHub Profile
@KhyatiMahendru
KhyatiMahendru / creditcardfrauddetection.ipynb
Last active February 21, 2022 16:37
CreditCardFraudDetection.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# import dataset
import pandas as pd
data = pd.read_csv('mtcars.csv')
# remove string and categorical variables
cat_var = ['model', 'cyl', 'vs', 'am', 'gear', 'carb']
data = data.drop(cat_var, axis = 1)
# scale the variables to prevent coefficients from becoming too large or too small
from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm
model = sm.OLS(y, X[:, 4]).fit()
model.summary()
from sklearn.datasets import make_regression
X, y = make_regression(n_samples = 20, n_features = 6, random_state = 2, noise = 0.5)
from sklearn.tree import DecisionTreeClassifier
clf_entropy = DecisionTreeClassifier(criterion = 'entropy', random_state = 33)
clf_entropy.fit(X, Y)
from sklearn.tree import DecisionTreeClassifier
clf_gini = DecisionTreeClassifier(criterion = 'gini', random_state = 33)
clf_gini.fit(X, Y)
from sklearn.metrics import silhouette_score
sil = []
kmax = 10
# dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2
for k in range(2, kmax+1):
kmeans = KMeans(n_clusters = k).fit(x)
labels = kmeans.labels_
sil.append(silhouette_score(x, labels, metric = 'euclidean'))
from sklearn.cluster import KMeans
# function returns WSS score for k values from 1 to kmax
def calculate_WSS(points, kmax):
sse = []
for k in range(1, kmax+1):
kmeans = KMeans(n_clusters = k).fit(points)
centroids = kmeans.cluster_centers_
pred_clusters = kmeans.predict(points)
curr_sse = 0
from sklearn.datasets import make_blobs
# Create dataset with 3 random cluster centers and 1000 datapoints
x, y = make_blobs(n_samples = 1000, centers = 3, n_features=2, shuffle=True, random_state=31)
# importing requirements
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import adam
# alpha = 0.001 as given in the lr parameter in adam() optimizer
# build the model
model_alpha1 = Sequential()
model_alpha1.add(Dense(50, input_dim=2, activation='relu'))