Skip to content

Instantly share code, notes, and snippets.

xgb_best = H2OXGBoostEstimator(**params_best,
monotone_constraints=mono_constraints)
xgb_best.train(x=features, y=target, training_frame=training_frame,
validation_frame=validation_frame)
@parulnith
parulnith / constraint.py
Created March 5, 2023 02:41
chapter11_page_359
corr = pd.DataFrame(train[features +
[target]].corr(method='spearman')[target]).iloc[:-1]
corr.columns = ['Spearman Correlation Coefficient']
values = [int(i) for i in np.sign(corr.values)]
mono_constraints = dict(zip(corr.index, values))
mono_constraints
@parulnith
parulnith / xgb_grid.py
Created April 7, 2022 14:25 — forked from jphall663/xgb_grid.py
Manual XGBoost grid search (Python)
iter_ = 0
best_error = 0
best_iter = 0
best_model = None
col_sample_rates = [0.1, 0.5, 0.9]
subsamples = [0.1, 0.5, 0.9]
etas = [0.01, 0.001]
max_depths = [3, 6, 12, 15, 18]
reg_alphas = [0.01, 0.001]
labels_dict = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
train['description'] = train['label'].map(labels_dict )
train.head()
import numpy as np
import pandas as pd
import string
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
colors = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(colors))
!wget https://www.dropbox.com/s/607ptdakxuh5i4s/merged_training.pkl
# Defining a helper function to load the data
import pickle
def load_from_pickle(directory):
return pickle.load(open(directory,"rb"))
# Loading the data
data = load_from_pickle(directory="merged_training.pkl")
# Renaming the column
se.rename(columns = {'Yearly bonus + stocks in EUR': 'Salary with Stocks'}, inplace=True)
# Creating a dataframe salary_exp2 containing salary with stocks and bonuses
se['Salary with Stocks'] = se['Salary with Stocks'].astype(int)
salary_exp2 = se.groupby(['Total years of experience'])['Salary with Stocks'].median().to_frame().reset_index()
salary_exp2[['Total years of experience','Salary with Stocks']] = salary_exp2[['Total years of experience','Salary with Stocks']].astype(int)
salary_exp2.sort_values('Total years of experience',inplace=True)
# Drawing a Radat Chart
chart = ctc.Scatter("Median compensation by years of experience")
chart.set_options(
x_label="Experience in Years",
y_label="Salary in USD",
x_tick_count=4,
y_tick_count=3,
dot_size=1,
colors=['#47B39C'])
chart.add_series("Salary",[(z[0], z[1]) for z in list(zip(salary_exp['Total years of experience'],salary_exp['Salary']))])
chart = ctc.Line("Median compensation by years of experience")
chart.set_options(
labels=list(salary_exp['Total years of experience']),
x_label="Experience in Years",
y_label="Salary in EUR",
colors=['#EA5F89'])
chart.add_series("Salary", list(salary_exp['Salary']))
# Calling the load_javascript function when rendering chart first time.
chart.load_javascript()
#Filtering salaryand experience details of only Software Engineers
se = df[df['Position '] == 'Software Engineer']
se.rename(columns = {'Yearly brutto salary (without bonus and stocks) in EUR': 'Salary'}, inplace=True)
salary_exp = se.groupby(['Total years of experience'])['Salary'].median().to_frame().reset_index()
salary_exp[['Total years of experience','Salary']] = salary_exp[['Total years of experience','Salary']].astype(int)
salary_exp.sort_values('Total years of experience',inplace=True)
salary_exp[:5]