naiborhujosua naiborhujosua

## parallelcoordinates.py
from scipy.stats import sem

# Import Data
data_mean = data.groupby('Hour')["Rented Bike Count"].mean()
data_se = data.groupby('Hour')["Rented Bike Count"].apply(sem).mul(1.96)

# Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.ylabel("# Hours", fontsize=16)
x = data_mean.index

## piechart.py
# Draw Plot
fig, ax = plt.subplots(figsize=(12, 7), subplot_kw=dict(aspect="equal"), dpi= 80)

# Prepare Data
df = data.groupby('Seasons').size().reset_index(name='counts')

data_df = df['counts']
categories = df['Seasons']
explode = [0,0,0,0.1]

## boxplot.py
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.boxplot(x='Seasons', y='Rented Bike Count', data=data, notch=False)

# Add N Obs inside boxplot (optional)
def add_n_obs(df,group_col,y):
    medians_dict = {grp[0]:grp[1][y].median() for grp in df.groupby(group_col)}
    xticklabels = [x.get_text() for x in plt.gca().get_xticklabels()]
    n_obs = df.groupby(group_col)[y].size().values
    for (x, xticklabel), n_ob in zip(enumerate(xticklabels), n_obs):

## densityplotwithcurves.py
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.distplot(data.loc[data['Holiday'] == 'No Holiday', "Rented Bike Count"], color="dodgerblue", label="No Holiday'", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
sns.distplot(data.loc[data['Holiday'] == 'Holiday', "Rented Bike Count"], color="orange", label="Holiday", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
plt.ylim(0, 0.004)

# Decoration
plt.title('Density Plot of Rented Bike Count by Holiday', fontsize=22)
plt.legend()
plt.show()

## densityplot.py
# Draw Plot
plt.figure(figsize=(16,10), dpi= 80)
sns.kdeplot(data.loc[data['Seasons'] == 'Winter', "Rented Bike Count"], shade=True, color="g", label="Winter", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Spring', "Rented Bike Count"], shade=True, color="deeppink", label="Spring", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Summer', "Rented Bike Count"], shade=True, color="dodgerblue", label="Summer", alpha=.7)
sns.kdeplot(data.loc[data['Seasons'] == 'Autumn', "Rented Bike Count"], shade=True, color="orange", label="Autumn", alpha=.7)

# Decoration
plt.title('Density Plot of City Mileage by n_Cylinders', fontsize=22)
plt.legend()

## Lolipopbarchart.py
df = data[['Rented Bike Count', 'Seasons']].groupby('Seasons').apply(lambda x: x.mean())
df.sort_values('Rented Bike Count', inplace=True)
df.reset_index(inplace=True)

# Draw plot
fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
ax.vlines(x=df.index, ymin=0, ymax=df['Rented Bike Count'], color='firebrick', alpha=0.7, linewidth=2)
ax.scatter(x=df.index, y=df['Rented Bike Count'], s=80, color='firebrick', alpha=0.7)

# Title, Label, Ticks and Ylim

## barchart.py
# Prepare Data
df = data[['Rented Bike Count', 'Seasons']].groupby('Seasons').apply(lambda x: x.mean())
df.sort_values('Rented Bike Count', inplace=True)
df.reset_index(inplace=True)

# Draw plot
import matplotlib.patches as patches

fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 80)
ax.vlines(x=df.index, ymin=0, ymax=df['Rented Bike Count'], color='firebrick', alpha=0.7, linewidth=20)

## timeseriesplot.py
# Prepare Data
data_df = pd.read_csv(DATA_DIR, encoding= 'unicode_escape', parse_dates=['Date']).head(100)
x = np.arange(data_df.shape[0])
y_returns = (data_df['Rented Bike Count'].diff().fillna(0)/data_df['Rented Bike Count'].shift(1)).fillna(0) * 100

# Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] >= 0, facecolor='green', interpolate=True, alpha=0.7)
plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] <= 0, facecolor='red', interpolate=True, alpha=0.7)

## marginal plot.py
# Plot
plt.figure(figsize=(10,8), dpi= 80)
sns.pairplot(data[["Rented Bike Count","Temperature(°C)","Seasons","Wind speed (m/s)"]], kind="scatter", hue="Seasons", plot_kws=dict(s=80, edgecolor="white", linewidth=2.5))
plt.show()

## Encircleplot.py
# As many colors as there are unique midwest['category']
categories = np.unique(data['Seasons'])
colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]

# Step 2: Draw Scatterplot with unique color for each category
fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')

for i, category in enumerate(categories):
    plt.scatter('Temperature(°C)', 'Rented Bike Count', data=data.loc[data.Seasons==category, :], s=20, c=colors[i], label=str(category), edgecolors='black', linewidths=.5)
	from scipy.stats import sem

	# Import Data
	data_mean = data.groupby('Hour')["Rented Bike Count"].mean()
	data_se = data.groupby('Hour')["Rented Bike Count"].apply(sem).mul(1.96)

	# Plot
	plt.figure(figsize=(16,10), dpi= 80)
	plt.ylabel("# Hours", fontsize=16)
	x = data_mean.index
	# Draw Plot
	fig, ax = plt.subplots(figsize=(12, 7), subplot_kw=dict(aspect="equal"), dpi= 80)

	# Prepare Data
	df = data.groupby('Seasons').size().reset_index(name='counts')

	data_df = df['counts']
	categories = df['Seasons']
	explode = [0,0,0,0.1]
	# Draw Plot
	plt.figure(figsize=(13,10), dpi= 80)
	sns.boxplot(x='Seasons', y='Rented Bike Count', data=data, notch=False)

	# Add N Obs inside boxplot (optional)
	def add_n_obs(df,group_col,y):
	medians_dict = {grp[0]:grp[1][y].median() for grp in df.groupby(group_col)}
	xticklabels = [x.get_text() for x in plt.gca().get_xticklabels()]
	n_obs = df.groupby(group_col)[y].size().values
	for (x, xticklabel), n_ob in zip(enumerate(xticklabels), n_obs):
	# Draw Plot
	plt.figure(figsize=(13,10), dpi= 80)
	sns.distplot(data.loc[data['Holiday'] == 'No Holiday', "Rented Bike Count"], color="dodgerblue", label="No Holiday'", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
	sns.distplot(data.loc[data['Holiday'] == 'Holiday', "Rented Bike Count"], color="orange", label="Holiday", hist_kws={'alpha':.7}, kde_kws={'linewidth':3})
	plt.ylim(0, 0.004)

	# Decoration
	plt.title('Density Plot of Rented Bike Count by Holiday', fontsize=22)
	plt.legend()
	plt.show()
	# Draw Plot
	plt.figure(figsize=(16,10), dpi= 80)
	sns.kdeplot(data.loc[data['Seasons'] == 'Winter', "Rented Bike Count"], shade=True, color="g", label="Winter", alpha=.7)
	sns.kdeplot(data.loc[data['Seasons'] == 'Spring', "Rented Bike Count"], shade=True, color="deeppink", label="Spring", alpha=.7)
	sns.kdeplot(data.loc[data['Seasons'] == 'Summer', "Rented Bike Count"], shade=True, color="dodgerblue", label="Summer", alpha=.7)
	sns.kdeplot(data.loc[data['Seasons'] == 'Autumn', "Rented Bike Count"], shade=True, color="orange", label="Autumn", alpha=.7)

	# Decoration
	plt.title('Density Plot of City Mileage by n_Cylinders', fontsize=22)
	plt.legend()
	df = data[['Rented Bike Count', 'Seasons']].groupby('Seasons').apply(lambda x: x.mean())
	df.sort_values('Rented Bike Count', inplace=True)
	df.reset_index(inplace=True)

	# Draw plot
	fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
	ax.vlines(x=df.index, ymin=0, ymax=df['Rented Bike Count'], color='firebrick', alpha=0.7, linewidth=2)
	ax.scatter(x=df.index, y=df['Rented Bike Count'], s=80, color='firebrick', alpha=0.7)

	# Title, Label, Ticks and Ylim
	# Prepare Data
	data_df = pd.read_csv(DATA_DIR, encoding= 'unicode_escape', parse_dates=['Date']).head(100)
	x = np.arange(data_df.shape[0])
	y_returns = (data_df['Rented Bike Count'].diff().fillna(0)/data_df['Rented Bike Count'].shift(1)).fillna(0) * 100

	# Plot
	plt.figure(figsize=(16,10), dpi= 80)
	plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] >= 0, facecolor='green', interpolate=True, alpha=0.7)
	plt.fill_between(x[1:], y_returns[1:], 0, where=y_returns[1:] <= 0, facecolor='red', interpolate=True, alpha=0.7)
	# Plot
	plt.figure(figsize=(10,8), dpi= 80)
	sns.pairplot(data[["Rented Bike Count","Temperature(°C)","Seasons","Wind speed (m/s)"]], kind="scatter", hue="Seasons", plot_kws=dict(s=80, edgecolor="white", linewidth=2.5))
	plt.show()
	# As many colors as there are unique midwest['category']
	categories = np.unique(data['Seasons'])
	colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]

	# Step 2: Draw Scatterplot with unique color for each category
	fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')

	for i, category in enumerate(categories):
	plt.scatter('Temperature(°C)', 'Rented Bike Count', data=data.loc[data.Seasons==category, :], s=20, c=colors[i], label=str(category), edgecolors='black', linewidths=.5)