Parul Pandey parulnith

## Line chart.py
chart = ctc.Line("Median compensation by years of experience")
chart.set_options(
 labels=list(salary_exp['Total years of experience']),
 x_label="Experience in Years",
 y_label="Salary in EUR",
 colors=['#EA5F89'])
chart.add_series("Salary", list(salary_exp['Salary']))

# Calling the load_javascript function when rendering chart first time.
chart.load_javascript()

## Scatter.py
chart = ctc.Scatter("Median compensation by years of experience")
chart.set_options(
    x_label="Experience in Years",
    y_label="Salary in USD",
    x_tick_count=4,
    y_tick_count=3,
    dot_size=1,
    colors=['#47B39C'])

chart.add_series("Salary",[(z[0], z[1]) for z in list(zip(salary_exp['Total years of experience'],salary_exp['Salary']))])

## radar chart.py
# Renaming the column
se.rename(columns = {'Yearly bonus + stocks in EUR': 'Salary with Stocks'}, inplace=True)

# Creating a dataframe salary_exp2 containing salary with stocks and bonuses
se['Salary with Stocks'] = se['Salary with Stocks'].astype(int)
salary_exp2 = se.groupby(['Total years of experience'])['Salary with Stocks'].median().to_frame().reset_index()
salary_exp2[['Total years of experience','Salary with Stocks']] = salary_exp2[['Total years of experience','Salary with Stocks']].astype(int)
salary_exp2.sort_values('Total years of experience',inplace=True)

# Drawing a Radat Chart

## Pie Chart.py
chart = ctc.Pie("Gender of Respondents")

chart.set_options(
  labels=list(gender.index),
  inner_radius=0,
  colors=['#FFF1C1','#F7B7A3','#EA5F89'],
 )
chart.add_series(list(gender['values']))

# Calling the load_javascript function when rendering chart first time.

## preprocessing_for_line chart.py
#Filtering salaryand experience details of only Software Engineers
se = df[df['Position '] == 'Software Engineer']
se.rename(columns = {'Yearly brutto salary (without bonus and stocks) in EUR': 'Salary'}, inplace=True)

salary_exp = se.groupby(['Total years of experience'])['Salary'].median().to_frame().reset_index()
salary_exp[['Total years of experience','Salary']] = salary_exp[['Total years of experience','Salary']].astype(int)
salary_exp.sort_values('Total years of experience',inplace=True)
salary_exp[:5]

## employee_dataset.py
df = pd.DataFrame({
    'Gender' : ['Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male','Male', 'Female','Male', 'Female'],
    'Age' : [41, 49, 37, 33, 27, 32, 59, 30, 38, 36, 35, 29],
    'EducationField': ['Life Sciences', 'Engineering', 'Life Sciences', 'Life Sciences', 'Medical', 'Life Sciences', 'Life Sciences', 'Life Sciences', 'Engineering', 'Medical', 'Life Sciences', 'Life Sciences'],
    'MonthlyIncome': [5993, 5130, 2090, 2909, 3468, 3068, 2670, 2693, 9526, 5237, 2426, 4193]
})

## species.py
df_Adelie = df[df['species'] == 'Adelie']
df_Gentoo = df[df['species'] == 'Gentoo']
df_Chinstrap = df[df['species'] == 'Chinstrap']

datasets = [df_Adelie,df_Gentoo,df_Chinstrap]
color = ['skyblue','red','orange']
zip_datasets_color = zip(datasets, color)
for d,c in zip_datasets_color:
    g = sns.lmplot(x = 'culmen_length_mm',
                   y = 'culmen_depth_mm',

## entire_population.py
sns.lmplot(x = 'culmen_length_mm',y = 'culmen_depth_mm', data = df);

# For calculating correlation coefficient and superimposing on the plot
r = stats.pearsonr(df['culmen_length_mm'], df['culmen_depth_mm'])[0]
ax = plt.gca()
ax.text(.03, 1, 'r={:.3f}'.format(r),
        transform=ax.transAxes)

#Displaying the plot
plt.show()

## celluloid_subplots.py
import numpy as np
from matplotlib import pyplot as plt
from celluloid import Camera

fig, axes = plt.subplots(2)
camera = Camera(fig)
t = np.linspace(0, 2 * np.pi, 128, endpoint=False)
for i in t:
    axes[0].plot(t, np.sin(t + i), color='blue')
    axes[1].plot(t, np.sin(t - i), color='blue')

## evaluate.py
%matplotlib inline
from sklearn.metrics import roc_curve, precision_recall_curve, auc
import matplotlib.pyplot as plt
import numpy as np


def get_auc(labels, scores):
    fpr, tpr, thresholds = roc_curve(labels, scores)
    auc_score = auc(fpr, tpr)
	chart = ctc.Line("Median compensation by years of experience")
	chart.set_options(
	labels=list(salary_exp['Total years of experience']),
	x_label="Experience in Years",
	y_label="Salary in EUR",
	colors=['#EA5F89'])
	chart.add_series("Salary", list(salary_exp['Salary']))

	# Calling the load_javascript function when rendering chart first time.
	chart.load_javascript()
	chart = ctc.Scatter("Median compensation by years of experience")
	chart.set_options(
	x_label="Experience in Years",
	y_label="Salary in USD",
	x_tick_count=4,
	y_tick_count=3,
	dot_size=1,
	colors=['#47B39C'])

	chart.add_series("Salary",[(z[0], z[1]) for z in list(zip(salary_exp['Total years of experience'],salary_exp['Salary']))])
	# Renaming the column
	se.rename(columns = {'Yearly bonus + stocks in EUR': 'Salary with Stocks'}, inplace=True)

	# Creating a dataframe salary_exp2 containing salary with stocks and bonuses
	se['Salary with Stocks'] = se['Salary with Stocks'].astype(int)
	salary_exp2 = se.groupby(['Total years of experience'])['Salary with Stocks'].median().to_frame().reset_index()
	salary_exp2[['Total years of experience','Salary with Stocks']] = salary_exp2[['Total years of experience','Salary with Stocks']].astype(int)
	salary_exp2.sort_values('Total years of experience',inplace=True)

	# Drawing a Radat Chart
	chart = ctc.Pie("Gender of Respondents")

	chart.set_options(
	labels=list(gender.index),
	inner_radius=0,
	colors=['#FFF1C1','#F7B7A3','#EA5F89'],
	)
	chart.add_series(list(gender['values']))

	# Calling the load_javascript function when rendering chart first time.
	#Filtering salaryand experience details of only Software Engineers
	se = df[df['Position '] == 'Software Engineer']
	se.rename(columns = {'Yearly brutto salary (without bonus and stocks) in EUR': 'Salary'}, inplace=True)

	salary_exp = se.groupby(['Total years of experience'])['Salary'].median().to_frame().reset_index()
	salary_exp[['Total years of experience','Salary']] = salary_exp[['Total years of experience','Salary']].astype(int)
	salary_exp.sort_values('Total years of experience',inplace=True)
	salary_exp[:5]
	df = pd.DataFrame({
	'Gender' : ['Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male','Male', 'Female','Male', 'Female'],
	'Age' : [41, 49, 37, 33, 27, 32, 59, 30, 38, 36, 35, 29],
	'EducationField': ['Life Sciences', 'Engineering', 'Life Sciences', 'Life Sciences', 'Medical', 'Life Sciences', 'Life Sciences', 'Life Sciences', 'Engineering', 'Medical', 'Life Sciences', 'Life Sciences'],
	'MonthlyIncome': [5993, 5130, 2090, 2909, 3468, 3068, 2670, 2693, 9526, 5237, 2426, 4193]
	})
	df_Adelie = df[df['species'] == 'Adelie']
	df_Gentoo = df[df['species'] == 'Gentoo']
	df_Chinstrap = df[df['species'] == 'Chinstrap']

	datasets = [df_Adelie,df_Gentoo,df_Chinstrap]
	color = ['skyblue','red','orange']
	zip_datasets_color = zip(datasets, color)
	for d,c in zip_datasets_color:
	g = sns.lmplot(x = 'culmen_length_mm',
	y = 'culmen_depth_mm',
	sns.lmplot(x = 'culmen_length_mm',y = 'culmen_depth_mm', data = df);

	# For calculating correlation coefficient and superimposing on the plot
	r = stats.pearsonr(df['culmen_length_mm'], df['culmen_depth_mm'])[0]
	ax = plt.gca()
	ax.text(.03, 1, 'r={:.3f}'.format(r),
	transform=ax.transAxes)

	#Displaying the plot
	plt.show()
	import numpy as np
	from matplotlib import pyplot as plt
	from celluloid import Camera

	fig, axes = plt.subplots(2)
	camera = Camera(fig)
	t = np.linspace(0, 2 * np.pi, 128, endpoint=False)
	for i in t:
	axes[0].plot(t, np.sin(t + i), color='blue')
	axes[1].plot(t, np.sin(t - i), color='blue')
	%matplotlib inline
	from sklearn.metrics import roc_curve, precision_recall_curve, auc
	import matplotlib.pyplot as plt
	import numpy as np



	def get_auc(labels, scores):
	fpr, tpr, thresholds = roc_curve(labels, scores)
	auc_score = auc(fpr, tpr)