Skip to content

Instantly share code, notes, and snippets.

View vikramsoni2's full-sized avatar

Vikram vikramsoni2

  • Baxter International
  • Madrid
View GitHub Profile
@vikramsoni2
vikramsoni2 / xgb_bayes_opt_cv.py
Created November 3, 2017 11:25 — forked from thomasjungblut/xgb_bayes_opt_cv.py
XGBoost hyper parameter optimization using bayes_opt
from bayes_opt import BayesianOptimization
from sklearn.cross_validation import KFold
import xgboost as xgb
def xgbCv(train, features, numRounds, eta, gamma, maxDepth, minChildWeight, subsample, colSample):
# prepare xgb parameters
params = {
"objective": "reg:linear",
"booster" : "gbtree",
"eval_metric": "mae",
@vikramsoni2
vikramsoni2 / auc_and_precision_recall_plot.py
Created October 3, 2018 07:45
precision recall and auc plot
def plot_auc_pr(y_test, y_pred):
fig = plt.figure(figsize=(20,7))
plt.subplot(1,2,1)
fpr, tpr, _ = roc_curve(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)
plt.plot(fpr,tpr,label="auc="+str(auc))
plt.title('roc-auc curve')
plt.legend(loc=4)
@vikramsoni2
vikramsoni2 / lgbm_feature_ranking.py
Created October 3, 2018 09:46
LGBMClassifier feature ranking
imp = pd.DataFrame({'feats':predictors,
'ranks': clf.feature_importances_}).sort_values('ranks', ascending=False)
fig, ax = plt.subplots(figsize=(12, 15))
# Example data
features = np.arange(len(imp.feats))
ranking = imp.ranks
ax.barh(features, ranking, align='center', color='skyblue', ecolor='black')
@vikramsoni2
vikramsoni2 / lgb_bayes_opt.py
Created October 3, 2018 13:29
lightgbm classifier bayesian optimization
import pandas as pd;
import numpy as np;
import lightgbm as lgb
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score
def lgb_evaluate(
numLeaves,
maxDepth,
scaleWeight,
@vikramsoni2
vikramsoni2 / xgb_bayes_opt.py
Created October 3, 2018 13:30
XGBoost classification bayesian optimization
from bayes_opt import BayesianOptimization
from sklearn.cross_validation import KFold
import xgboost as xgb
import numpy
def xgbCv(train, features, numRounds, eta, gamma, maxDepth, minChildWeight, subsample, colSample):
# prepare xgb parameters
params = {
"objective": "binary:logistic",
"booster" : "gbtree",
@vikramsoni2
vikramsoni2 / correlation_matrix.py
Created October 5, 2018 10:45
pearson correlation matrix
features = ['bedrooms', 'bathrooms', 'sqft_living',
'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
'sqft_living15', 'sqft_lot15']
f, ax = plt.subplots(figsize=(16, 12))
plt.title('Pearson Correlation Matrix',fontsize=25)
sns.heatmap(df[features].corr(), linewidths=0.25, vmax=1.0, square=True, cmap="BuGn_r", linecolor='k', annot=True)
@vikramsoni2
vikramsoni2 / imagenet1000_clsid_to_human.txt
Created January 5, 2019 15:47 — forked from yrevar/imagenet1000_clsidx_to_labels.txt
text: imagenet 1000 class id to human readable labels (Fox, E., & Guestrin, C. (n.d.). Coursera Machine Learning Specialization.)
{0: 'tench, Tinca tinca',
1: 'goldfish, Carassius auratus',
2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
3: 'tiger shark, Galeocerdo cuvieri',
4: 'hammerhead, hammerhead shark',
5: 'electric ray, crampfish, numbfish, torpedo',
6: 'stingray',
7: 'cock',
8: 'hen',
9: 'ostrich, Struthio camelus',
def add_datepart(df, fldname, drop=True):
fld = df[fldname]
if not np.issubdtype(fld.dtype, np.datetime64):
df[fldname] = fld = pd.to_datetime(fld, infer_datetime_format=True)
targ_pre = re.sub('[Dd]ate$', '', fldname)
for n in ('Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear',
'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start'):
df[targ_pre+n] = getattr(fld.dt,n.lower())
df[targ_pre+'Elapsed'] = fld.astype(np.int64) // 10**9
if drop: df.drop(fldname, axis=1, inplace=True)
import optuna
from collections import Counter
# optuna
predictors, target, key_id, entity, timestamp, order_by_entity, group_within_entity = build_predictors_naming_conventions(list(df_train.columns.values))
def get_pos_weight(train, label=target):
w = Counter(train[label])
return round((w[0]/w[1])*0.90,2)
@vikramsoni2
vikramsoni2 / cosine_matching.py
Last active March 5, 2019 11:46
Fuzzy matching between two arrays of strings. for each item in array1, selects the most similar item from array2
### Fuzzy matching between two arrays.
### for each item in array1, selects the most similar item from array2
import re, math
from collections import Counter
WORD = re.compile(r'\w+')
def get_cosine(vec1, vec2):
intersection = set(vec1.keys()) & set(vec2.keys())