Skip to content

Instantly share code, notes, and snippets.

for idx, val in enumerate(x):
if val > knee_start:
kneedle = KneeLocator(x=x[idx:], y=y_fit[idx:], curve='concave', direction='increasing')
knee_end = kneedle.knee
print(knee_end)
break
def logistic_growth(x, k, x0):
return 1 / (1 + np.exp(-k*(x - x0)))
data['Cummulative Trips'] = data['Casual Trips'].cumsum()
data['Percentage Trips'] = data['Cummulative Trips'] / data['Casual Trips'].sum()
x = data['Mean Temp'].values
y = data['Percentage Trips'].values
popt, pcov = curve_fit(logistic_growth, x, y, maxfev=2000)
popt, pcov = curve_fit(exp_growth_no_shift, x, y, maxfev=2000)
# Estimate the first knee point
kneedle = KneeLocator(x=x, y=exp_growth_no_shift(x, *popt), curve='convex', direction='increasing')
knee_start = kneedle.knee
print(knee_start)
from kneed import KneeLocator
from scipy.optimize import curve_fit
# Define the curve fitting equations
def linear(x, m, b):
return m*x + b
def exp_growth_no_shift(x, a, b):
return a * np.exp(-b * x)
def exp_growth(x, a, b, c):
return a * np.exp(-b * x) + c
data = data.sort_values('Mean Temp').dropna().reset_index(drop=True)
fig, ax = plt.subplots(figsize=(20, 10))
sns.scatterplot(x='Mean Temp', y='Casual Trips', data=data)
fig, ax = plt.subplots(figsize=(16, 9))
ax2 = ax.twinx() # Create the twin axis to enable display of ridership and temperature on the same graph
palette = sns.color_palette() # Get the default color palette
for i, user_type in enumerate(['Casual Trips', 'Member Trips']):
sns.lineplot(x='Date', y=user_type, data=data, ax=ax, color=palette[i], markers='')
sns.pointplot(x='Date', y='Mean Temp', data=data, ax=ax2, color=palette[2], markers='x')
# Import the weather data and drop the first 22 rows (containing descriptions of the weather station)
weather = pd.read_csv('./data/weather.csv', header=22)
# Remove units contained in the column names (eg. Celcius, mm, etc.)
weather.columns = [re.sub(r'\([^()]*\)', '', x).strip() if x != 'Date/Time' else 'Date' for x in weather.columns]
data = df.groupby(['Date', 'User Type'])['Id'].nunique().to_frame().pivot_table(index='Date', columns='User Type').reset_index()
data.columns = ['Date', 'Casual Trips', 'Member Trips']
data = data.merge(weather[['Date', 'Mean Temp', 'Total Precip']], on='Date', how='inner')
data = df.groupby(['Date', 'Hour', 'User Type'])['Id'].nunique().groupby(['Hour', 'User Type']).mean().reset_index()
fig, ax = plt.subplots(figsize=(16, 9))
sns.barplot(x='Hour', y='Id', hue='User Type', data=data, ax=ax)
ax.set_ylabel('Average Hourly Trips')
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes = np.array(axes).flatten()
for m, ax in zip(ridership[ridership['Quarter']==3]['Month'].unique(), axes):
ax.set_title(m)
ax.set_ylim(0, 7000)
ax.set_ylabel('Average Daily Trips')
sns.barplot(
x='Day of Week',
y='Id',
fig, axes = plt.subplots(2, 2, figsize=(15, 15))
# Flatten the 2D axes array for ease of looping
axes = np.array(axes).flatten()
# Prepare the month description titles for each quarter
quarter_names = ['Jan. - Mar.', 'Apr. - Jun.', 'Jul. - Sept.', 'Oct. - Dec.']
for q, ax in zip(sorted(ridership['Quarter'].unique()), axes):
ax.set_title(quarter_names[(q-1)])