Parul Pandey parulnith

## hyperparameters.py
xgb_best = H2OXGBoostEstimator(**params_best,
                               monotone_constraints=mono_constraints)
xgb_best.train(x=features, y=target, training_frame=training_frame,
validation_frame=validation_frame)


## constraint.py
corr = pd.DataFrame(train[features +
                          [target]].corr(method='spearman')[target]).iloc[:-1]
corr.columns = ['Spearman Correlation Coefficient']
values = [int(i) for i in np.sign(corr.values)]
mono_constraints = dict(zip(corr.index, values))
mono_constraints

## xgb_grid.py
iter_ = 0
best_error = 0
best_iter = 0
best_model = None

col_sample_rates = [0.1, 0.5, 0.9]
subsamples = [0.1, 0.5, 0.9]
etas = [0.01, 0.001]
max_depths = [3, 6, 12, 15, 18]
reg_alphas = [0.01, 0.001]

## description.py
labels_dict = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
train['description'] = train['label'].map(labels_dict )
train.head()

## import libraries.py
import numpy as np
import pandas as pd
import string

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
colors = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(colors))

## Loading data.py
!wget https://www.dropbox.com/s/607ptdakxuh5i4s/merged_training.pkl

# Defining a helper function to load the data
import pickle
def load_from_pickle(directory):
return pickle.load(open(directory,"rb"))

# Loading the data
data = load_from_pickle(directory="merged_training.pkl")

## radar chart.py
# Renaming the column
se.rename(columns = {'Yearly bonus + stocks in EUR': 'Salary with Stocks'}, inplace=True)

# Creating a dataframe salary_exp2 containing salary with stocks and bonuses
se['Salary with Stocks'] = se['Salary with Stocks'].astype(int)
salary_exp2 = se.groupby(['Total years of experience'])['Salary with Stocks'].median().to_frame().reset_index()
salary_exp2[['Total years of experience','Salary with Stocks']] = salary_exp2[['Total years of experience','Salary with Stocks']].astype(int)
salary_exp2.sort_values('Total years of experience',inplace=True)

# Drawing a Radat Chart

## Scatter.py
chart = ctc.Scatter("Median compensation by years of experience")
chart.set_options(
    x_label="Experience in Years",
    y_label="Salary in USD",
    x_tick_count=4,
    y_tick_count=3,
    dot_size=1,
    colors=['#47B39C'])

chart.add_series("Salary",[(z[0], z[1]) for z in list(zip(salary_exp['Total years of experience'],salary_exp['Salary']))])

## Line chart.py
chart = ctc.Line("Median compensation by years of experience")
chart.set_options(
 labels=list(salary_exp['Total years of experience']),
 x_label="Experience in Years",
 y_label="Salary in EUR",
 colors=['#EA5F89'])
chart.add_series("Salary", list(salary_exp['Salary']))

# Calling the load_javascript function when rendering chart first time.
chart.load_javascript()

## preprocessing_for_line chart.py
#Filtering salaryand experience details of only Software Engineers
se = df[df['Position '] == 'Software Engineer']
se.rename(columns = {'Yearly brutto salary (without bonus and stocks) in EUR': 'Salary'}, inplace=True)

salary_exp = se.groupby(['Total years of experience'])['Salary'].median().to_frame().reset_index()
salary_exp[['Total years of experience','Salary']] = salary_exp[['Total years of experience','Salary']].astype(int)
salary_exp.sort_values('Total years of experience',inplace=True)
salary_exp[:5]
	xgb_best = H2OXGBoostEstimator(**params_best,
	monotone_constraints=mono_constraints)
	xgb_best.train(x=features, y=target, training_frame=training_frame,
	validation_frame=validation_frame)
	corr = pd.DataFrame(train[features +
	[target]].corr(method='spearman')[target]).iloc[:-1]
	corr.columns = ['Spearman Correlation Coefficient']
	values = [int(i) for i in np.sign(corr.values)]
	mono_constraints = dict(zip(corr.index, values))
	mono_constraints
	iter_ = 0
	best_error = 0
	best_iter = 0
	best_model = None

	col_sample_rates = [0.1, 0.5, 0.9]
	subsamples = [0.1, 0.5, 0.9]
	etas = [0.01, 0.001]
	max_depths = [3, 6, 12, 15, 18]
	reg_alphas = [0.01, 0.001]
	labels_dict = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
	train['description'] = train['label'].map(labels_dict )
	train.head()
	import numpy as np
	import pandas as pd
	import string

	import matplotlib
	import matplotlib.pyplot as plt
	import seaborn as sns
	sns.set(style='whitegrid', palette='muted', font_scale=1.2)
	colors = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
	sns.set_palette(sns.color_palette(colors))
	!wget https://www.dropbox.com/s/607ptdakxuh5i4s/merged_training.pkl

	# Defining a helper function to load the data
	import pickle
	def load_from_pickle(directory):
	return pickle.load(open(directory,"rb"))

	# Loading the data
	data = load_from_pickle(directory="merged_training.pkl")
	# Renaming the column
	se.rename(columns = {'Yearly bonus + stocks in EUR': 'Salary with Stocks'}, inplace=True)

	# Creating a dataframe salary_exp2 containing salary with stocks and bonuses
	se['Salary with Stocks'] = se['Salary with Stocks'].astype(int)
	salary_exp2 = se.groupby(['Total years of experience'])['Salary with Stocks'].median().to_frame().reset_index()
	salary_exp2[['Total years of experience','Salary with Stocks']] = salary_exp2[['Total years of experience','Salary with Stocks']].astype(int)
	salary_exp2.sort_values('Total years of experience',inplace=True)

	# Drawing a Radat Chart
	chart = ctc.Scatter("Median compensation by years of experience")
	chart.set_options(
	x_label="Experience in Years",
	y_label="Salary in USD",
	x_tick_count=4,
	y_tick_count=3,
	dot_size=1,
	colors=['#47B39C'])

	chart.add_series("Salary",[(z[0], z[1]) for z in list(zip(salary_exp['Total years of experience'],salary_exp['Salary']))])
	chart = ctc.Line("Median compensation by years of experience")
	chart.set_options(
	labels=list(salary_exp['Total years of experience']),
	x_label="Experience in Years",
	y_label="Salary in EUR",
	colors=['#EA5F89'])
	chart.add_series("Salary", list(salary_exp['Salary']))

	# Calling the load_javascript function when rendering chart first time.
	chart.load_javascript()
	#Filtering salaryand experience details of only Software Engineers
	se = df[df['Position '] == 'Software Engineer']
	se.rename(columns = {'Yearly brutto salary (without bonus and stocks) in EUR': 'Salary'}, inplace=True)

	salary_exp = se.groupby(['Total years of experience'])['Salary'].median().to_frame().reset_index()
	salary_exp[['Total years of experience','Salary']] = salary_exp[['Total years of experience','Salary']].astype(int)
	salary_exp.sort_values('Total years of experience',inplace=True)
	salary_exp[:5]