This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set the plot with different scale parameter and title | |
sns.violinplot(x="continent", y="life_expectancy", data=df, | |
palette="Set3", | |
order=["Africa", "Asia", "Americas", "Europe", "Oceania"], | |
inner=None, | |
scale="count") | |
plt.title("Violinplot of Life Expectancy Among Continents Between 1952 and 2007") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def outlier_function(df, col_name): | |
''' this function detects first and third quartile and interquartile range for a given column of a dataframe | |
then calculates upper and lower limits to determine outliers conservatively | |
returns the number of lower and uper limit and number of outliers respectively | |
''' | |
first_quartile = np.percentile(np.array(df[col_name].tolist()), 25) | |
third_quartile = np.percentile(np.array(df[col_name].tolist()), 75) | |
IQR = third_quartile - first_quartile | |
upper_limit = third_quartile+(3*IQR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set the plot size | |
figsize(14,10) | |
# set the histogram, mean and median | |
sns.distplot(trees["Cover_Type"], kde=False) | |
plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5) | |
plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5) | |
# set title, legends and labels | |
plt.xlabel("Cover_Type") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# distribution of the cover type in different wilderness areas | |
figsize(14,10) | |
# plot cover_type distribution for each wilderness area | |
for area in wilderness_areas: | |
subset = trees[trees['Wilderness_Area_Type'] == area] | |
sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2) | |
# set title, legends and labels | |
plt.ylabel("Density") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_numbers_chars(row): | |
'''This function fetches the numerical characters at the end of a string | |
and returns alphabetical character and numerical chaarcters respectively''' | |
head = row.rstrip('0123456789') | |
tail = row[len(head):] | |
return head, tail | |
def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name): | |
''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments | |
1) transforms one-hot-encoded columns into one column consisting of column names with string data type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def corr_func(x, y, **kwargs): | |
r = np.corrcoef(x, y)[0][1] | |
ax = plt.gca() | |
ax.annotate("r = {:.2f}".format(r), | |
xy=(.2, .8), xycoords=ax.transAxes, | |
size = 20) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
figsize=(16,14) | |
# plot the heatmap | |
colormap = plt.cm.RdBu | |
sns.heatmap(correlations,linewidths=0.1, | |
square=False, cmap=colormap, linecolor='white', annot=True) | |
plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
px.scatter(gapminder_df, | |
x="Income", | |
y="Life Expectancy", | |
size="Population", | |
size_max=60, | |
color="Region", | |
hover_name="Country", | |
animation_frame="Year", | |
animation_group="Country", | |
color_discrete_sequence=["#FF69B4", "#87CEFA", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig = px.scatter(gapminder_2018, | |
x="Income", | |
y="Life Expectancy", | |
size="Population", | |
size_max=60, | |
color="Region", | |
hover_name="Country", | |
facet_col="Income Level", | |
color_discrete_sequence=["#FF69B4", "#87CEFA", | |
"#FFFF00", "#32CD32"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import Ridge | |
#verilerin yüklenmesi | |
boston = load_boston() | |
X = boston.data | |
y = boston.target |