This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_list_of_K(K, dataframe): | |
'''inputs: K as integer and dataframe | |
apply k-means clustering to dataframe | |
and make a list of inertia values against 1 to K | |
return the inertia values list | |
''' | |
cluster_values = list(range(1, K+1)) | |
inertia_values=[] | |
for c in cluster_values: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def apply_log1p_transformation(dataframe, column): | |
'''This function takes a dataframe and a column in the string format | |
then applies numpy log1p transformation to the column | |
as a result returns log1p applied pandas series''' | |
dataframe["log_" + column] = np.log1p(dataframe[column]) | |
return dataframe["log_" + column] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import Ridge | |
#verilerin yüklenmesi | |
boston = load_boston() | |
X = boston.data | |
y = boston.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig = px.scatter(gapminder_2018, | |
x="Income", | |
y="Life Expectancy", | |
size="Population", | |
size_max=60, | |
color="Region", | |
hover_name="Country", | |
facet_col="Income Level", | |
color_discrete_sequence=["#FF69B4", "#87CEFA", | |
"#FFFF00", "#32CD32"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
px.scatter(gapminder_df, | |
x="Income", | |
y="Life Expectancy", | |
size="Population", | |
size_max=60, | |
color="Region", | |
hover_name="Country", | |
animation_frame="Year", | |
animation_group="Country", | |
color_discrete_sequence=["#FF69B4", "#87CEFA", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
figsize=(16,14) | |
# plot the heatmap | |
colormap = plt.cm.RdBu | |
sns.heatmap(correlations,linewidths=0.1, | |
square=False, cmap=colormap, linecolor='white', annot=True) | |
plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def corr_func(x, y, **kwargs): | |
r = np.corrcoef(x, y)[0][1] | |
ax = plt.gca() | |
ax.annotate("r = {:.2f}".format(r), | |
xy=(.2, .8), xycoords=ax.transAxes, | |
size = 20) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_numbers_chars(row): | |
'''This function fetches the numerical characters at the end of a string | |
and returns alphabetical character and numerical chaarcters respectively''' | |
head = row.rstrip('0123456789') | |
tail = row[len(head):] | |
return head, tail | |
def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name): | |
''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments | |
1) transforms one-hot-encoded columns into one column consisting of column names with string data type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# distribution of the cover type in different wilderness areas | |
figsize(14,10) | |
# plot cover_type distribution for each wilderness area | |
for area in wilderness_areas: | |
subset = trees[trees['Wilderness_Area_Type'] == area] | |
sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2) | |
# set title, legends and labels | |
plt.ylabel("Density") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set the plot size | |
figsize(14,10) | |
# set the histogram, mean and median | |
sns.distplot(trees["Cover_Type"], kde=False) | |
plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5) | |
plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5) | |
# set title, legends and labels | |
plt.xlabel("Cover_Type") |