import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm def process_stock_data(df): # Copy the DataFrame to avoid modifying the original processed_df = df.copy() # Define columns to drop cols_to_drop = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits'] # Drop specified columns processed_df.drop(cols_to_drop, axis=1, inplace=True) # Convert 'Close' column to numeric processed_df['Close'] = pd.to_numeric(processed_df['Close'], errors='coerce') # Drop rows with missing 'Close' values, if any processed_df = processed_df.dropna(subset=['Close']) return processed_df filtered_dataframe = process_stock_data(df) filtered_dataframe = filtered_dataframe.groupby('Date')['Close'].sum().reset_index() filtered_dataframe.head() # Data decomposition from pylab import rcParams rcParams['figure.figsize'] = 18, 8 # Convert 'Date' column to datetime if not already in datetime format filtered_dataframe['Date'] = pd.to_datetime(filtered_dataframe['Date']) # Set 'Date' column as the index filtered_dataframe.set_index('Date', inplace=True) # Now perform seasonal decomposition # Now perform seasonal decomposition decomposition = sm.tsa.seasonal_decompose(filtered_dataframe['Close'], model='additive', period=30) # Assuming monthly data trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid # Plot the decomposed series plt.figure(figsize=(18, 8)) plt.subplot(411) plt.plot(filtered_dataframe.index, filtered_dataframe['Close'], label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(filtered_dataframe.index, trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(filtered_dataframe.index, seasonal, label='Seasonal') plt.legend(loc='best') plt.subplot(414) plt.plot(filtered_dataframe.index, residual, label='Residual') plt.legend(loc='best') plt.tight_layout() plt.show()