Skip to content

Instantly share code, notes, and snippets.

View naiborhujosua's full-sized avatar
👨‍💻

naiborhujosua naiborhujosua

👨‍💻
View GitHub Profile
from scipy.stats import sem
# Import Data
data_mean = data.groupby('Hour')["Rented Bike Count"].mean()
data_se = data.groupby('Hour')["Rented Bike Count"].apply(sem).mul(1.96)
# Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.ylabel("# Hours", fontsize=16)
x = data_mean.index
# Draw Plot
fig, ax = plt.subplots(figsize=(12, 7), subplot_kw=dict(aspect="equal"), dpi= 80)
# Prepare Data
df = data.groupby('Seasons').size().reset_index(name='counts')
data_df = df['counts']
categories = df['Seasons']
explode = [0,0,0,0.1]
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.boxplot(x='Seasons', y='Rented Bike Count', data=data, notch=False)
# Add N Obs inside boxplot (optional)
def add_n_obs(df,group_col,y):
medians_dict = {grp[0]:grp[1][y].median() for grp in df.groupby(group_col)}
xticklabels = [x.get_text() for x in plt.gca().get_xticklabels()]
n_obs = df.groupby(group_col)[y].size().values
for (x, xticklabel), n_ob in zip(enumerate(xticklabels), n_obs):
@naiborhujosua
naiborhujosua / densityplotwithcurves.py
Created July 26, 2022 03:43
Density Plot with Curves
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.distplot(data.loc[data['Holiday'] == 'No Holiday', "Rented Bike Count"], color="dodgerblue", label="No Holiday'", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
sns.distplot(data.loc[data['Holiday'] == 'Holiday', "Rented Bike Count"], color="orange", label="Holiday", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
plt.ylim(0, 0.004)
# Decoration
plt.title('Density Plot of Rented Bike Count by Holiday', fontsize=22)
plt.legend()
plt.show()
# Draw Plot
plt.figure(figsize=(16,10), dpi= 80)
sns.kdeplot(data.loc[data['Seasons'] == 'Winter', "Rented Bike Count"], shade=True, color="g", label="Winter", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Spring', "Rented Bike Count"], shade=True, color="deeppink", label="Spring", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Summer', "Rented Bike Count"], shade=True, color="dodgerblue", label="Summer", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Autumn', "Rented Bike Count"], shade=True, color="orange", label="Autumn", alpha=.7)
# Decoration
plt.title('Density Plot of City Mileage by n_Cylinders', fontsize=22)
plt.legend()
@naiborhujosua
naiborhujosua / Lolipopbarchart.py
Created July 26, 2022 03:40
Lollipop Bar Chart
df = data[['Rented Bike Count', 'Seasons']].groupby('Seasons').apply(lambda x: x.mean())
df.sort_values('Rented Bike Count', inplace=True)
df.reset_index(inplace=True)
# Draw plot
fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
ax.vlines(x=df.index, ymin=0, ymax=df['Rented Bike Count'], color='firebrick', alpha=0.7, linewidth=2)
ax.scatter(x=df.index, y=df['Rented Bike Count'], s=80, color='firebrick', alpha=0.7)
# Title, Label, Ticks and Ylim
@naiborhujosua
naiborhujosua / barchart.py
Created July 26, 2022 03:39
Order Bar Chart
# Prepare Data
df = data[['Rented Bike Count', 'Seasons']].groupby('Seasons').apply(lambda x: x.mean())
df.sort_values('Rented Bike Count', inplace=True)
df.reset_index(inplace=True)
# Draw plot
import matplotlib.patches as patches
fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 80)
ax.vlines(x=df.index, ymin=0, ymax=df['Rented Bike Count'], color='firebrick', alpha=0.7, linewidth=20)
@naiborhujosua
naiborhujosua / timeseriesplot.py
Created July 26, 2022 03:38
TIme Series Plot
# Prepare Data
data_df = pd.read_csv(DATA_DIR, encoding= 'unicode_escape', parse_dates=['Date']).head(100)
x = np.arange(data_df.shape[0])
y_returns = (data_df['Rented Bike Count'].diff().fillna(0)/data_df['Rented Bike Count'].shift(1)).fillna(0) * 100
# Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] >= 0, facecolor='green', interpolate=True, alpha=0.7)
plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] <= 0, facecolor='red', interpolate=True, alpha=0.7)
# Plot
plt.figure(figsize=(10,8), dpi= 80)
sns.pairplot(data[["Rented Bike Count","Temperature(°C)","Seasons","Wind speed (m/s)"]], kind="scatter", hue="Seasons", plot_kws=dict(s=80, edgecolor="white", linewidth=2.5))
plt.show()
# As many colors as there are unique midwest['category']
categories = np.unique(data['Seasons'])
colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]
# Step 2: Draw Scatterplot with unique color for each category
fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')
for i, category in enumerate(categories):
plt.scatter('Temperature(°C)', 'Rented Bike Count', data=data.loc[data.Seasons==category, :], s=20, c=colors[i], label=str(category), edgecolors='black', linewidths=.5)