Skip to content

Instantly share code, notes, and snippets.

View egemenzeytinci's full-sized avatar
💻
Work hard play hard

Egemen Zeytinci egemenzeytinci

💻
Work hard play hard
  • Istanbul, Turkey
View GitHub Profile
# three quantiles to rfm values
df['r_val'] = pd.qcut(df['recency'], q=3, labels=range(3, 0, -1))
df['f_val'] = pd.qcut(df['frequency'], q=3, labels=range(1, 4))
df['m_val'] = pd.qcut(df['monetary'], q=3, labels=range(1, 4))
# create the segment value
df['rfm_val'] = (
df['r_val'].astype(str) +
df['f_val'].astype(str) +
df['m_val'].astype(str)
from py2neo import Graph
import pandas as pd
host = 'localhost'
port = 7687
user = ''
password = ''
graph = Graph(
host=host,
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
class Report:
def __init__(self, X_test, y_test):
self.X = X_test
self.y = y_test
def metrics(self, model):
y_pred = model.predict(self.X)
print('Accuracy score:\n')
print(accuracy_score(self.y, y_pred))
def compare():
for is_le in [True, False]:
method = 'label encoder'
if is_le:
selected = df_le[selects_le + ['is_canceled']]
else:
selected = df_hot[selects_hot + ['is_canceled']]
method = 'dummy variables'
def select(X):
selects = []
selector = SelectKBest(chi2, k='all').fit(X, y)
scores = selector.scores_
q3 = np.quantile(scores, 0.75)
q1 = np.quantile(scores, 0.25)
iqr = q3 - q1
threshold = q3 + 1.5 * iqr
@egemenzeytinci
egemenzeytinci / iqr.py
Created December 25, 2019 15:11
Outlier detection with iqr
cleaned = df.copy()
columns = [
'lead_time',
'stays_in_weekend_nights',
'stays_in_week_nights',
'adults',
'children',
'babies',
'adr',
@egemenzeytinci
egemenzeytinci / tweet_dumper.py
Created December 20, 2019 11:08 — forked from yanofsky/LICENSE
A script to download all of a user's tweets into a csv
#!/usr/bin/env python
# encoding: utf-8
import tweepy #https://github.com/tweepy/tweepy
import csv
#Twitter API credentials
consumer_key = ""
consumer_secret = ""
access_key = ""
@egemenzeytinci
egemenzeytinci / feature_importance.py
Created December 15, 2019 09:58
Feature importances in python
from rfpimp import permutation_importances
from sklearn.base import clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pandas as pd
def imp_df(column_names, importances):
data = {
@egemenzeytinci
egemenzeytinci / preprocessing.py
Last active November 26, 2019 21:28
Preprocessing steps in python
from nltk.corpus import stopwords
from stemming.porter2 import stem
import nltk
import re
import string
nltk.download('punkt')
nltk.download('stopwords')
default_stopwords = stopwords.words('english')