Ceren cereniyim

## Violinplot_scale.py
# set the plot with different scale parameter and title
sns.violinplot(x="continent", y="life_expectancy", data=df,
               palette="Set3",
               order=["Africa", "Asia", "Americas", "Europe", "Oceania"],
               inner=None,
               scale="count")
plt.title("Violinplot of Life Expectancy Among Continents Between 1952 and 2007")

## outlier_function.py
def outlier_function(df, col_name):
    ''' this function detects first and third quartile and interquartile range for a given column of a dataframe
    then calculates upper and lower limits to determine outliers conservatively
    returns the number of lower and uper limit and number of outliers respectively
    '''
    first_quartile = np.percentile(np.array(df[col_name].tolist()), 25)
    third_quartile = np.percentile(np.array(df[col_name].tolist()), 75)
    IQR = third_quartile - first_quartile

    upper_limit = third_quartile+(3*IQR)

## distribution_plot.py
# set the plot size
figsize(14,10)

# set the histogram, mean and median
sns.distplot(trees["Cover_Type"], kde=False)
plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5)
plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5)

# set title, legends and labels
plt.xlabel("Cover_Type")

## kde_plot_target.py
# distribution of the cover type in different wilderness areas
figsize(14,10)

# plot cover_type distribution for each wilderness area
for area in wilderness_areas:
    subset = trees[trees['Wilderness_Area_Type'] == area]
    sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2)

# set title, legends and labels
plt.ylabel("Density")

## reverse_one_hot_enocde.py
def split_numbers_chars(row):
    '''This function fetches the numerical characters at the end of a string
    and returns alphabetical character and numerical chaarcters respectively'''
    head = row.rstrip('0123456789')
    tail = row[len(head):]
    return head, tail

def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name):
    ''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments
    1) transforms one-hot-encoded columns into one column consisting of column names with string data type

## correlation_coefficients.py
def corr_func(x, y, **kwargs):
    r = np.corrcoef(x, y)[0][1]
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r),
                xy=(.2, .8), xycoords=ax.transAxes,
                size = 20)


## correlations_heatmap.py
figsize=(16,14)

# plot the heatmap
colormap = plt.cm.RdBu
sns.heatmap(correlations,linewidths=0.1,
            square=False, cmap=colormap, linecolor='white', annot=True)
plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14)

## animated_bubble_chart.py
px.scatter(gapminder_df,
           x="Income",
           y="Life Expectancy",
           size="Population",
           size_max=60,
           color="Region",
           hover_name="Country",
           animation_frame="Year",
           animation_group="Country",
           color_discrete_sequence=["#FF69B4", "#87CEFA",

## factfulness_bubble_chart_2018.py
fig = px.scatter(gapminder_2018,
                 x="Income",
                 y="Life Expectancy",
                 size="Population",
                 size_max=60,
                 color="Region",
                 hover_name="Country",
                 facet_col="Income Level",
                 color_discrete_sequence=["#FF69B4", "#87CEFA",
                                          "#FFFF00", "#32CD32"],

## grid_search_visualization.py
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge

#verilerin yüklenmesi
boston = load_boston()
X = boston.data
y = boston.target
	# set the plot with different scale parameter and title
	sns.violinplot(x="continent", y="life_expectancy", data=df,
	palette="Set3",
	order=["Africa", "Asia", "Americas", "Europe", "Oceania"],
	inner=None,
	scale="count")
	plt.title("Violinplot of Life Expectancy Among Continents Between 1952 and 2007")
	def outlier_function(df, col_name):
	''' this function detects first and third quartile and interquartile range for a given column of a dataframe
	then calculates upper and lower limits to determine outliers conservatively
	returns the number of lower and uper limit and number of outliers respectively
	'''
	first_quartile = np.percentile(np.array(df[col_name].tolist()), 25)
	third_quartile = np.percentile(np.array(df[col_name].tolist()), 75)
	IQR = third_quartile - first_quartile

	upper_limit = third_quartile+(3*IQR)
	# set the plot size
	figsize(14,10)

	# set the histogram, mean and median
	sns.distplot(trees["Cover_Type"], kde=False)
	plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5)
	plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5)

	# set title, legends and labels
	plt.xlabel("Cover_Type")
	# distribution of the cover type in different wilderness areas
	figsize(14,10)

	# plot cover_type distribution for each wilderness area
	for area in wilderness_areas:
	subset = trees[trees['Wilderness_Area_Type'] == area]
	sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2)

	# set title, legends and labels
	plt.ylabel("Density")
	def split_numbers_chars(row):
	'''This function fetches the numerical characters at the end of a string
	and returns alphabetical character and numerical chaarcters respectively'''
	head = row.rstrip('0123456789')
	tail = row[len(head):]
	return head, tail

	def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name):
	''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments
	1) transforms one-hot-encoded columns into one column consisting of column names with string data type
	def corr_func(x, y, **kwargs):
	r = np.corrcoef(x, y)[0][1]
	ax = plt.gca()
	ax.annotate("r = {:.2f}".format(r),
	xy=(.2, .8), xycoords=ax.transAxes,
	size = 20)
	figsize=(16,14)

	# plot the heatmap
	colormap = plt.cm.RdBu
	sns.heatmap(correlations,linewidths=0.1,
	square=False, cmap=colormap, linecolor='white', annot=True)
	plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14)
	px.scatter(gapminder_df,
	x="Income",
	y="Life Expectancy",
	size="Population",
	size_max=60,
	color="Region",
	hover_name="Country",
	animation_frame="Year",
	animation_group="Country",
	color_discrete_sequence=["#FF69B4", "#87CEFA",
	fig = px.scatter(gapminder_2018,
	x="Income",
	y="Life Expectancy",
	size="Population",
	size_max=60,
	color="Region",
	hover_name="Country",
	facet_col="Income Level",
	color_discrete_sequence=["#FF69B4", "#87CEFA",
	"#FFFF00", "#32CD32"],
	import matplotlib.pyplot as plt
	import pandas as pd
	from sklearn.model_selection import GridSearchCV
	from sklearn.datasets import load_boston
	from sklearn.linear_model import Ridge

	#verilerin yüklenmesi
	boston = load_boston()
	X = boston.data
	y = boston.target