Skip to content

Instantly share code, notes, and snippets.

View MaartenGr's full-sized avatar

Maarten Grootendorst MaartenGr

View GitHub Profile
from imblearn.over_sampling import SMOTE
# Different sampling strategies can be applied
X_resampled, y_resampled = SMOTE(sampling_strategy={"Fraud":1000}).fit_resample(X, y)
#
X_resampled = pd.DataFrame(X_resampled, columns=X.columns)
@MaartenGr
MaartenGr / smote.py
Created August 9, 2019 05:49
implementation of smote
import pandas as pd
from imblearn.over_sampling import SMOTE
# Import data and create X, y
df = pd.read_csv('creditcard_small.csv')
X = df.iloc[:,:-1]
y = df.iloc[:,-1].map({1:'Fraud', 0:'No Fraud'})
# Resample data
X_resampled, y_resampled = SMOTE(sampling_strategy={"Fraud":1000}).fit_resample(X, y)
import featuretools as ft
import pandas as pd
# Create Entity
turnover_df = pd.read_csv('turnover.csv')
es = ft.EntitySet(id = 'Turnover')
es.entity_from_dataframe(entity_id = 'hr', dataframe = turnover_df, index = 'index')
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'hr',
# explicitly require this experimental feature
# now you can import normally from sklearn.impute
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
# Load data
titanic = pd.read_csv("titanic.csv")
from sklearn.ensemble import IsolationForest
import pandas as pd
import seaborn as sns
# Predict and visualize outliers
credit_card = pd.read_csv('creditcard_small.csv').drop("Class", 1)
clf = IsolationForest(contamination=0.01, behaviour='new')
outliers = clf.fit_predict(credit_card)
sns.scatterplot(credit_card.V4, credit_card.V2, outliers, palette='Set1', legend=False)
import pickle
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.preprocessing import OneHotEncoder
# Load data and save indices of columns
df = pd.read_csv("data.csv")
features = df.drop('left', 1).columns
pickle.dump(features, open('features.pickle', 'wb'))
# Data Handling
import pickle
import numpy as np
from pydantic import BaseModel
# Server
import uvicorn
from fastapi import FastAPI
# Modeling
import requests
to_predict_dict = {'satisfaction_level': 0.38,
'last_evaluation': 0.53,
'number_project': 2,
'average_montly_hours': 157,
'time_spend_company': 3,
'Work_accident': 0,
'promotion_last_5years': 0,
'sales': 'support',
'salary': 'low'}
FROM tiangolo/uvicorn-gunicorn:python3.6-alpine3.8
# Make directories suited to your application
RUN mkdir -p /home/project/app
WORKDIR /home/project/app
# Copy and install requirements
COPY requirements.txt /home/project/app
RUN pip install --no-cache-dir -r requirements.txt
import numpy as np
from sklearn.model_selection import train_test_split
X, y = np.arange(10).reshape((5, 2)), range(5)
(X_train, X_test,
y_train, y_test) = train_test_split(X, y, test_size=0.3,
random_state=42)