import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm


def process_stock_data(df):
    # Copy the DataFrame to avoid modifying the original
    processed_df = df.copy()

    # Define columns to drop
    cols_to_drop = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits']

    # Drop specified columns
    processed_df.drop(cols_to_drop, axis=1, inplace=True)

    # Convert 'Close' column to numeric
    processed_df['Close'] = pd.to_numeric(processed_df['Close'], errors='coerce')

    # Drop rows with missing 'Close' values, if any
    processed_df = processed_df.dropna(subset=['Close'])

    return processed_df



filtered_dataframe = process_stock_data(df)
filtered_dataframe = filtered_dataframe.groupby('Date')['Close'].sum().reset_index()
filtered_dataframe.head()

# Data decomposition
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

# Convert 'Date' column to datetime if not already in datetime format
filtered_dataframe['Date'] = pd.to_datetime(filtered_dataframe['Date'])

# Set 'Date' column as the index
filtered_dataframe.set_index('Date', inplace=True)

# Now perform seasonal decomposition
# Now perform seasonal decomposition
decomposition = sm.tsa.seasonal_decompose(filtered_dataframe['Close'], model='additive', period=30)  # Assuming monthly data

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

# Plot the decomposed series
plt.figure(figsize=(18, 8))
plt.subplot(411)
plt.plot(filtered_dataframe.index, filtered_dataframe['Close'], label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(filtered_dataframe.index, trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(filtered_dataframe.index, seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(filtered_dataframe.index, residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
plt.show()