import numpy as np
import matplotlib.dates as mdates

# Choose ISM00040180 station and make sure TAVG column is numeric
df = data[data['STATION'] == 'ISM00040180'].copy()
df['TAVG'] = pd.to_numeric(df['TAVG'], errors='coerce')

# Convert DATE to datetime
df['DATE'] = pd.to_datetime(df['DATE'])

# Calculate the 'baseline' for 1951-1980
baseline_df = df[(df['DATE'].dt.year >= 1951) & (df['DATE'].dt.year <= 1980)]
baseline = baseline_df['TAVG'].mean()

# Filter data from 1973 and later
df = df[df['DATE'].dt.year >= 1980]

# Calculate the anomalies
df['Anomaly'] = df['TAVG'] - baseline

# Convert DATE to numeric for calculations
df['DATE_num'] = mdates.date2num(df['DATE'])

# Calculate trend lines
df_max = df.groupby(df['DATE'].dt.year)['Anomaly'].idxmax()
df_min = df.groupby(df['DATE'].dt.year)['Anomaly'].idxmin()

# Top trend
max_coeffs = np.polyfit(df.loc[df_max, 'DATE_num'], df.loc[df_max, 'Anomaly'], 1)
max_trend = max_coeffs[0] * df['DATE_num'] + max_coeffs[1]

# Bottom trend
min_coeffs = np.polyfit(df.loc[df_min, 'DATE_num'], df.loc[df_min, 'Anomaly'], 1)
min_trend = min_coeffs[0] * df['DATE_num'] + min_coeffs[1]

# Plot
plt.figure(figsize=(10, 6))
plt.plot(df['DATE'], df['Anomaly'], label='Temperature Anomaly')
plt.plot(df['DATE'], max_trend, color='orange', linestyle='--', label='Max Trend')
plt.plot(df['DATE'], min_trend, color='green', linestyle='--', label='Min Trend')
plt.axhline(0, color='red', linestyle='--', label='Baseline')  # This adds a horizontal line
plt.xlabel('Date')
plt.ylabel('Temperature Anomaly (°C)')
plt.title('Temperature Anomaly Graph for TAVG for station ISM00040180 (from 1973 onwards)')
plt.legend()
plt.show()