Yizhao Tan dottyz

## story_bike_share_analyze_17.py
for idx, val in enumerate(x):
    if val > knee_start:
        kneedle = KneeLocator(x=x[idx:], y=y_fit[idx:], curve='concave', direction='increasing')
        knee_end = kneedle.knee

        print(knee_end)
        break

## story_bike_share_analyze_16.py
def logistic_growth(x, k, x0):
    return 1 / (1 + np.exp(-k*(x - x0)))

data['Cummulative Trips'] = data['Casual Trips'].cumsum()
data['Percentage Trips'] = data['Cummulative Trips'] / data['Casual Trips'].sum()

x = data['Mean Temp'].values
y = data['Percentage Trips'].values

popt, pcov = curve_fit(logistic_growth, x, y, maxfev=2000)

## story_bike_share_analyze_15.py
popt, pcov = curve_fit(exp_growth_no_shift, x, y, maxfev=2000)

# Estimate the first knee point
kneedle = KneeLocator(x=x, y=exp_growth_no_shift(x, *popt), curve='convex', direction='increasing')

knee_start = kneedle.knee
print(knee_start)

## story_bike_share_analyze_14.py
from kneed import KneeLocator
from scipy.optimize import curve_fit

# Define the curve fitting equations
def linear(x, m, b):
    return m*x + b
def exp_growth_no_shift(x, a, b):
    return a * np.exp(-b * x)
def exp_growth(x, a, b, c):
    return a * np.exp(-b * x) + c

## story_bike_share_analyze_13.py
data = data.sort_values('Mean Temp').dropna().reset_index(drop=True)

fig, ax = plt.subplots(figsize=(20, 10))
sns.scatterplot(x='Mean Temp', y='Casual Trips', data=data)

## story_bike_share_analyze_12.py
fig, ax = plt.subplots(figsize=(16, 9))

ax2 = ax.twinx()              # Create the twin axis to enable display of ridership and temperature on the same graph
palette = sns.color_palette() # Get the default color palette

for i, user_type in enumerate(['Casual Trips', 'Member Trips']):
    sns.lineplot(x='Date', y=user_type, data=data, ax=ax, color=palette[i], markers='')

sns.pointplot(x='Date', y='Mean Temp', data=data, ax=ax2, color=palette[2], markers='x')

## story_bike_share_analyze_11.py
# Import the weather data and drop the first 22 rows (containing descriptions of the weather station)
weather = pd.read_csv('./data/weather.csv', header=22)

# Remove units contained in the column names (eg. Celcius, mm, etc.)
weather.columns = [re.sub(r'\([^()]*\)', '', x).strip() if x != 'Date/Time' else 'Date' for x in weather.columns]

data = df.groupby(['Date', 'User Type'])['Id'].nunique().to_frame().pivot_table(index='Date', columns='User Type').reset_index()
data.columns = ['Date', 'Casual Trips', 'Member Trips']
data = data.merge(weather[['Date', 'Mean Temp', 'Total Precip']], on='Date', how='inner')

## story_bike_share_analyze_10.py
data = df.groupby(['Date', 'Hour', 'User Type'])['Id'].nunique().groupby(['Hour', 'User Type']).mean().reset_index()
fig, ax = plt.subplots(figsize=(16, 9))

sns.barplot(x='Hour', y='Id', hue='User Type', data=data, ax=ax)
ax.set_ylabel('Average Hourly Trips')

## story_bike_share_analyze_9.py
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes = np.array(axes).flatten()
for m, ax in zip(ridership[ridership['Quarter']==3]['Month'].unique(), axes):
    ax.set_title(m)
    ax.set_ylim(0, 7000)
    ax.set_ylabel('Average Daily Trips')

    sns.barplot(
        x='Day of Week',
        y='Id',

## story_bike_share_analyze_8.py
fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# Flatten the 2D axes array for ease of looping
axes = np.array(axes).flatten()

# Prepare the month description titles for each quarter
quarter_names = ['Jan. - Mar.', 'Apr. - Jun.', 'Jul. - Sept.', 'Oct. - Dec.']

for q, ax in zip(sorted(ridership['Quarter'].unique()), axes):
    ax.set_title(quarter_names[(q-1)])
	for idx, val in enumerate(x):
	if val > knee_start:
	kneedle = KneeLocator(x=x[idx:], y=y_fit[idx:], curve='concave', direction='increasing')
	knee_end = kneedle.knee

	print(knee_end)
	break
	def logistic_growth(x, k, x0):
	return 1 / (1 + np.exp(-k*(x - x0)))

	data['Cummulative Trips'] = data['Casual Trips'].cumsum()
	data['Percentage Trips'] = data['Cummulative Trips'] / data['Casual Trips'].sum()

	x = data['Mean Temp'].values
	y = data['Percentage Trips'].values

	popt, pcov = curve_fit(logistic_growth, x, y, maxfev=2000)
	popt, pcov = curve_fit(exp_growth_no_shift, x, y, maxfev=2000)

	# Estimate the first knee point
	kneedle = KneeLocator(x=x, y=exp_growth_no_shift(x, *popt), curve='convex', direction='increasing')

	knee_start = kneedle.knee
	print(knee_start)
	from kneed import KneeLocator
	from scipy.optimize import curve_fit

	# Define the curve fitting equations
	def linear(x, m, b):
	return m*x + b
	def exp_growth_no_shift(x, a, b):
	return a * np.exp(-b * x)
	def exp_growth(x, a, b, c):
	return a * np.exp(-b * x) + c
	data = data.sort_values('Mean Temp').dropna().reset_index(drop=True)

	fig, ax = plt.subplots(figsize=(20, 10))
	sns.scatterplot(x='Mean Temp', y='Casual Trips', data=data)
	fig, ax = plt.subplots(figsize=(16, 9))

	ax2 = ax.twinx() # Create the twin axis to enable display of ridership and temperature on the same graph
	palette = sns.color_palette() # Get the default color palette

	for i, user_type in enumerate(['Casual Trips', 'Member Trips']):
	sns.lineplot(x='Date', y=user_type, data=data, ax=ax, color=palette[i], markers='')

	sns.pointplot(x='Date', y='Mean Temp', data=data, ax=ax2, color=palette[2], markers='x')
	# Import the weather data and drop the first 22 rows (containing descriptions of the weather station)
	weather = pd.read_csv('./data/weather.csv', header=22)

	# Remove units contained in the column names (eg. Celcius, mm, etc.)
	weather.columns = [re.sub(r'\([^()]*\)', '', x).strip() if x != 'Date/Time' else 'Date' for x in weather.columns]

	data = df.groupby(['Date', 'User Type'])['Id'].nunique().to_frame().pivot_table(index='Date', columns='User Type').reset_index()
	data.columns = ['Date', 'Casual Trips', 'Member Trips']
	data = data.merge(weather[['Date', 'Mean Temp', 'Total Precip']], on='Date', how='inner')
	data = df.groupby(['Date', 'Hour', 'User Type'])['Id'].nunique().groupby(['Hour', 'User Type']).mean().reset_index()
	fig, ax = plt.subplots(figsize=(16, 9))

	sns.barplot(x='Hour', y='Id', hue='User Type', data=data, ax=ax)
	ax.set_ylabel('Average Hourly Trips')
	fig, axes = plt.subplots(1, 3, figsize=(18, 6))
	axes = np.array(axes).flatten()
	for m, ax in zip(ridership[ridership['Quarter']==3]['Month'].unique(), axes):
	ax.set_title(m)
	ax.set_ylim(0, 7000)
	ax.set_ylabel('Average Daily Trips')

	sns.barplot(
	x='Day of Week',
	y='Id',
	fig, axes = plt.subplots(2, 2, figsize=(15, 15))

	# Flatten the 2D axes array for ease of looping
	axes = np.array(axes).flatten()

	# Prepare the month description titles for each quarter
	quarter_names = ['Jan. - Mar.', 'Apr. - Jun.', 'Jul. - Sept.', 'Oct. - Dec.']

	for q, ax in zip(sorted(ridership['Quarter'].unique()), axes):
	ax.set_title(quarter_names[(q-1)])