Ceren cereniyim

## make_list_of_K.py
def make_list_of_K(K, dataframe):
  '''inputs: K as integer and dataframe
  apply k-means clustering to dataframe
  and make a list of inertia values against 1 to K
  return the inertia values list
  '''
    cluster_values = list(range(1, K+1))
    inertia_values=[]

    for c in cluster_values:

## logarithm_transformation.py
def apply_log1p_transformation(dataframe, column):
    '''This function takes a dataframe and a column in the string format
    then applies numpy log1p transformation to the column
    as a result returns log1p applied pandas series'''

    dataframe["log_" + column] = np.log1p(dataframe[column])
    return dataframe["log_" + column]

## grid_search_visualization.py
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge

#verilerin yüklenmesi
boston = load_boston()
X = boston.data
y = boston.target

## factfulness_bubble_chart_2018.py
fig = px.scatter(gapminder_2018,
                 x="Income",
                 y="Life Expectancy",
                 size="Population",
                 size_max=60,
                 color="Region",
                 hover_name="Country",
                 facet_col="Income Level",
                 color_discrete_sequence=["#FF69B4", "#87CEFA",
                                          "#FFFF00", "#32CD32"],

## animated_bubble_chart.py
px.scatter(gapminder_df,
           x="Income",
           y="Life Expectancy",
           size="Population",
           size_max=60,
           color="Region",
           hover_name="Country",
           animation_frame="Year",
           animation_group="Country",
           color_discrete_sequence=["#FF69B4", "#87CEFA",

## correlations_heatmap.py
figsize=(16,14)

# plot the heatmap
colormap = plt.cm.RdBu
sns.heatmap(correlations,linewidths=0.1,
            square=False, cmap=colormap, linecolor='white', annot=True)
plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14)

## correlation_coefficients.py
def corr_func(x, y, **kwargs):
    r = np.corrcoef(x, y)[0][1]
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r),
                xy=(.2, .8), xycoords=ax.transAxes,
                size = 20)


## reverse_one_hot_enocde.py
def split_numbers_chars(row):
    '''This function fetches the numerical characters at the end of a string
    and returns alphabetical character and numerical chaarcters respectively'''
    head = row.rstrip('0123456789')
    tail = row[len(head):]
    return head, tail

def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name):
    ''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments
    1) transforms one-hot-encoded columns into one column consisting of column names with string data type

## kde_plot_target.py
# distribution of the cover type in different wilderness areas
figsize(14,10)

# plot cover_type distribution for each wilderness area
for area in wilderness_areas:
    subset = trees[trees['Wilderness_Area_Type'] == area]
    sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2)

# set title, legends and labels
plt.ylabel("Density")

## distribution_plot.py
# set the plot size
figsize(14,10)

# set the histogram, mean and median
sns.distplot(trees["Cover_Type"], kde=False)
plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5)
plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5)

# set title, legends and labels
plt.xlabel("Cover_Type")
	def make_list_of_K(K, dataframe):
	'''inputs: K as integer and dataframe
	apply k-means clustering to dataframe
	and make a list of inertia values against 1 to K
	return the inertia values list
	'''
	cluster_values = list(range(1, K+1))
	inertia_values=[]

	for c in cluster_values:
	def apply_log1p_transformation(dataframe, column):
	'''This function takes a dataframe and a column in the string format
	then applies numpy log1p transformation to the column
	as a result returns log1p applied pandas series'''

	dataframe["log_" + column] = np.log1p(dataframe[column])
	return dataframe["log_" + column]
	import matplotlib.pyplot as plt
	import pandas as pd
	from sklearn.model_selection import GridSearchCV
	from sklearn.datasets import load_boston
	from sklearn.linear_model import Ridge

	#verilerin yüklenmesi
	boston = load_boston()
	X = boston.data
	y = boston.target
	fig = px.scatter(gapminder_2018,
	x="Income",
	y="Life Expectancy",
	size="Population",
	size_max=60,
	color="Region",
	hover_name="Country",
	facet_col="Income Level",
	color_discrete_sequence=["#FF69B4", "#87CEFA",
	"#FFFF00", "#32CD32"],
	px.scatter(gapminder_df,
	x="Income",
	y="Life Expectancy",
	size="Population",
	size_max=60,
	color="Region",
	hover_name="Country",
	animation_frame="Year",
	animation_group="Country",
	color_discrete_sequence=["#FF69B4", "#87CEFA",
	figsize=(16,14)

	# plot the heatmap
	colormap = plt.cm.RdBu
	sns.heatmap(correlations,linewidths=0.1,
	square=False, cmap=colormap, linecolor='white', annot=True)
	plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14)
	def corr_func(x, y, **kwargs):
	r = np.corrcoef(x, y)[0][1]
	ax = plt.gca()
	ax.annotate("r = {:.2f}".format(r),
	xy=(.2, .8), xycoords=ax.transAxes,
	size = 20)
	def split_numbers_chars(row):
	'''This function fetches the numerical characters at the end of a string
	and returns alphabetical character and numerical chaarcters respectively'''
	head = row.rstrip('0123456789')
	tail = row[len(head):]
	return head, tail

	def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name):
	''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments
	1) transforms one-hot-encoded columns into one column consisting of column names with string data type
	# distribution of the cover type in different wilderness areas
	figsize(14,10)

	# plot cover_type distribution for each wilderness area
	for area in wilderness_areas:
	subset = trees[trees['Wilderness_Area_Type'] == area]
	sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2)

	# set title, legends and labels
	plt.ylabel("Density")
	# set the plot size
	figsize(14,10)

	# set the histogram, mean and median
	sns.distplot(trees["Cover_Type"], kde=False)
	plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5)
	plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5)

	# set title, legends and labels
	plt.xlabel("Cover_Type")