Created
April 11, 2025 05:12
-
-
Save innopreneur/3beeeb2ff6d0cb9821cfe5d24f884163 to your computer and use it in GitHub Desktop.
This is a starter notebook for the Conch Republic Coral Quest. It is designed to help you kickstart your quest.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Coral Quest - Starter Notebook | |
""" | |
π§ ABOUT THIS NOTEBOOK | |
This is a **starter notebook** for the Conch Republic Coral Quest. It is designed to help you get up and running with: | |
- Loading and exploring the CREMP coral reef dataset | |
- Visualizing trends over time and by site | |
- Building simple forecasting and regression models | |
Feel free to **modify, extend, and improve** this notebook based on your own ideas and analytical approach. | |
Some things you might want to change or add: | |
- Use more features (e.g. bleaching, depth, pollution, etc.) | |
- Expand to multiple reef sites | |
- Try clustering, time-lagged regression, or other ML models | |
- Refine evaluation metrics | |
Good luck, and donβt forget to tell a story with your data! ππ | |
""" | |
# π¦ 1. Import Libraries | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from prophet import Prophet | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import mean_squared_error | |
sns.set(style="whitegrid") | |
# π§ͺ 2. Load Dataset | |
# TODO: Replace this with actual dataset file path or URL | |
data = pd.read_csv("coral_data.csv") # placeholder filename | |
# π 3. Preview Data | |
print("\nFirst few rows of the dataset:") | |
print(data.head()) | |
print("\nColumns:", data.columns) | |
# π§Ό 4. Basic Cleanup | |
# Optional: Handle missing values, data types, column renaming if needed | |
# For example: | |
# data = data.dropna() | |
# π 5. Trend Over Time (Coral Cover) | |
plt.figure(figsize=(12, 6)) | |
sns.lineplot(data=data, x='Year', y='Stony_Coral_Cover', hue='Site') | |
plt.title('Stony Coral Cover Over Time by Site') | |
plt.ylabel('Stony Coral Cover (%)') | |
plt.xlabel('Year') | |
plt.legend(title='Reef Site') | |
plt.tight_layout() | |
plt.show() | |
# πΊοΈ 6. Correlation Heatmap | |
plt.figure(figsize=(6, 4)) | |
corr = data[["Stony_Coral_Cover", "Species_Richness", "Temperature"]].corr() | |
sns.heatmap(corr, annot=True, cmap='coolwarm') | |
plt.title('Correlation Matrix') | |
plt.tight_layout() | |
plt.show() | |
# π 7. Forecasting with Prophet (Single Site Example) | |
# Prepare data for Prophet (must have 'ds' and 'y' columns) | |
site_data = data[data['Site'] == data['Site'].unique()[0]] # first site | |
prophet_df = site_data[['Year', 'Stony_Coral_Cover']].rename(columns={'Year': 'ds', 'Stony_Coral_Cover': 'y'}) | |
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y') | |
model = Prophet() | |
model.fit(prophet_df) | |
# Create future dataframe | |
future = model.make_future_dataframe(periods=5, freq='Y') | |
forecast = model.predict(future) | |
# Plot forecast | |
model.plot(forecast) | |
plt.title('Forecast of Coral Cover for Site A (Prophet)') | |
plt.show() | |
# π§ 8. Regression Model: Random Forest | |
# Select features and target | |
features = data[["Year", "Temperature"]] | |
target = data["Stony_Coral_Cover"] | |
# Split data into train and test sets | |
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) | |
# Initialize and train the model | |
rf = RandomForestRegressor(n_estimators=100, random_state=42) | |
rf.fit(X_train, y_train) | |
# Predict and evaluate | |
y_pred = rf.predict(X_test) | |
mse = mean_squared_error(y_test, y_pred) | |
print(f"\nRandom Forest MSE: {mse:.2f}") | |
# Plot predictions vs true values | |
plt.figure(figsize=(6, 4)) | |
plt.scatter(y_test, y_pred) | |
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--') | |
plt.xlabel("Actual Coral Cover") | |
plt.ylabel("Predicted Coral Cover") | |
plt.title("Random Forest Prediction Accuracy") | |
plt.tight_layout() | |
plt.show() | |
# β Notebook Complete! Add more sites, models, or pollution variables as needed. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment