Skip to content

Instantly share code, notes, and snippets.

View cereniyim's full-sized avatar

Ceren cereniyim

View GitHub Profile
@cereniyim
cereniyim / Violinplot_scale.py
Last active October 15, 2019 14:32
Violinplot with Scale
# set the plot with different scale parameter and title
sns.violinplot(x="continent", y="life_expectancy", data=df,
palette="Set3",
order=["Africa", "Asia", "Americas", "Europe", "Oceania"],
inner=None,
scale="count")
plt.title("Violinplot of Life Expectancy Among Continents Between 1952 and 2007")
@cereniyim
cereniyim / outlier_function.py
Created January 8, 2020 12:56
Extreme outlier detection
def outlier_function(df, col_name):
''' this function detects first and third quartile and interquartile range for a given column of a dataframe
then calculates upper and lower limits to determine outliers conservatively
returns the number of lower and uper limit and number of outliers respectively
'''
first_quartile = np.percentile(np.array(df[col_name].tolist()), 25)
third_quartile = np.percentile(np.array(df[col_name].tolist()), 75)
IQR = third_quartile - first_quartile
upper_limit = third_quartile+(3*IQR)
@cereniyim
cereniyim / distribution_plot.py
Created January 8, 2020 13:02
Distribution plot with mean and median
# set the plot size
figsize(14,10)
# set the histogram, mean and median
sns.distplot(trees["Cover_Type"], kde=False)
plt.axvline(x=trees.Cover_Type.mean(), linewidth=3, color='g', label="mean", alpha=0.5)
plt.axvline(x=trees.Cover_Type.median(), linewidth=3, color='y', label="median", alpha=0.5)
# set title, legends and labels
plt.xlabel("Cover_Type")
@cereniyim
cereniyim / kde_plot_target.py
Created January 8, 2020 13:05
KDE plot with wilderness area
# distribution of the cover type in different wilderness areas
figsize(14,10)
# plot cover_type distribution for each wilderness area
for area in wilderness_areas:
subset = trees[trees['Wilderness_Area_Type'] == area]
sns.kdeplot(subset["Cover_Type"], label=area, linewidth=2)
# set title, legends and labels
plt.ylabel("Density")
@cereniyim
cereniyim / reverse_one_hot_enocde.py
Created January 8, 2020 13:07
Split numbers and reverse one hot encode
def split_numbers_chars(row):
'''This function fetches the numerical characters at the end of a string
and returns alphabetical character and numerical chaarcters respectively'''
head = row.rstrip('0123456789')
tail = row[len(head):]
return head, tail
def reverse_one_hot_encode(dataframe, start_loc, end_loc, numeric_column_name):
''' this function takes the start and end location of the one-hot-encoded column set and numeric column name to be created as arguments
1) transforms one-hot-encoded columns into one column consisting of column names with string data type
@cereniyim
cereniyim / correlation_coefficients.py
Created January 8, 2020 13:10
Function to calculate correlation coefficient between two columns
def corr_func(x, y, **kwargs):
r = np.corrcoef(x, y)[0][1]
ax = plt.gca()
ax.annotate("r = {:.2f}".format(r),
xy=(.2, .8), xycoords=ax.transAxes,
size = 20)
@cereniyim
cereniyim / correlations_heatmap.py
Created January 8, 2020 13:13
plot correlations heatmap
figsize=(16,14)
# plot the heatmap
colormap = plt.cm.RdBu
sns.heatmap(correlations,linewidths=0.1,
square=False, cmap=colormap, linecolor='white', annot=True)
plt.title('Pearson Correlation of Features with Numeric Soil_Type', size=14)
@cereniyim
cereniyim / animated_bubble_chart.py
Created January 17, 2020 10:02
Gapminder's animated bubble chart
px.scatter(gapminder_df,
x="Income",
y="Life Expectancy",
size="Population",
size_max=60,
color="Region",
hover_name="Country",
animation_frame="Year",
animation_group="Country",
color_discrete_sequence=["#FF69B4", "#87CEFA",
@cereniyim
cereniyim / factfulness_bubble_chart_2018.py
Last active January 19, 2020 09:11
Factfulness Bubble Chart for 2018
fig = px.scatter(gapminder_2018,
x="Income",
y="Life Expectancy",
size="Population",
size_max=60,
color="Region",
hover_name="Country",
facet_col="Income Level",
color_discrete_sequence=["#FF69B4", "#87CEFA",
"#FFFF00", "#32CD32"],
@cereniyim
cereniyim / grid_search_visualization.py
Last active January 29, 2020 20:15
Grid Search applied to Ridge Model
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge
#verilerin yüklenmesi
boston = load_boston()
X = boston.data
y = boston.target