riqbal-k/fmfactors.ipynb

## fmfactors.ipynb
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":6855131,"sourceType":"datasetVersion","datasetId":3940311}],"dockerImageVersionId":30558,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2023-11-10T09:25:23.060869Z","iopub.execute_input":"2023-11-10T09:25:23.062312Z","iopub.status.idle":"2023-11-10T09:25:23.073329Z","shell.execute_reply.started":"2023-11-10T09:25:23.062265Z","shell.execute_reply":"2023-11-10T09:25:23.072151Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom sklearn.ensemble import RandomForestRegressor","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:16:54.914077Z","iopub.execute_input":"2023-11-10T12:16:54.915621Z","iopub.status.idle":"2023-11-10T12:16:59.705939Z","shell.execute_reply.started":"2023-11-10T12:16:54.915495Z","shell.execute_reply":"2023-11-10T12:16:59.704678Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import xgboost as xgb\nfrom xgboost import XGBRegressor\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.svm import SVR","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:06.041818Z","iopub.execute_input":"2023-11-10T12:17:06.042682Z","iopub.status.idle":"2023-11-10T12:17:06.217633Z","shell.execute_reply.started":"2023-11-10T12:17:06.042646Z","shell.execute_reply":"2023-11-10T12:17:06.216510Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom tensorflow import keras\nfrom tensorflow.keras import layers\nimport torch\nimport tensorflow as tf","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:12.892539Z","iopub.execute_input":"2023-11-10T12:17:12.892955Z","iopub.status.idle":"2023-11-10T12:17:21.863478Z","shell.execute_reply.started":"2023-11-10T12:17:12.892921Z","shell.execute_reply":"2023-11-10T12:17:21.862219Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Importing the dataset\ndata = pd.read_csv('/kaggle/input/pool23/pool2.csv')","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:21.865359Z","iopub.execute_input":"2023-11-10T12:17:21.866309Z","iopub.status.idle":"2023-11-10T12:17:21.927231Z","shell.execute_reply.started":"2023-11-10T12:17:21.866271Z","shell.execute_reply":"2023-11-10T12:17:21.925959Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data.head()","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:23.953472Z","iopub.execute_input":"2023-11-10T12:17:23.954600Z","iopub.status.idle":"2023-11-10T12:17:23.987854Z","shell.execute_reply.started":"2023-11-10T12:17:23.954559Z","shell.execute_reply":"2023-11-10T12:17:23.987002Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 3-factor model","metadata":{}},{"cell_type":"code","source":"# Create a new variable 'rm-rf' by subtracting 'rf' from 'rm'\ndata['rm-rf'] = data['rm'] - data['rf']\n# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:31.863994Z","iopub.execute_input":"2023-11-10T12:17:31.865356Z","iopub.status.idle":"2023-11-10T12:17:31.879937Z","shell.execute_reply.started":"2023-11-10T12:17:31.865309Z","shell.execute_reply":"2023-11-10T12:17:31.879056Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:47.898997Z","iopub.execute_input":"2023-11-10T12:17:47.899490Z","iopub.status.idle":"2023-11-10T12:17:47.909695Z","shell.execute_reply.started":"2023-11-10T12:17:47.899432Z","shell.execute_reply":"2023-11-10T12:17:47.908420Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Random Forest model","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:52.488292Z","iopub.execute_input":"2023-11-10T12:17:52.489687Z","iopub.status.idle":"2023-11-10T12:17:52.963711Z","shell.execute_reply.started":"2023-11-10T12:17:52.489630Z","shell.execute_reply":"2023-11-10T12:17:52.962635Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:01.018281Z","iopub.execute_input":"2023-11-10T12:18:01.019589Z","iopub.status.idle":"2023-11-10T12:18:01.046884Z","shell.execute_reply.started":"2023-11-10T12:18:01.019527Z","shell.execute_reply":"2023-11-10T12:18:01.045549Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:05.135231Z","iopub.execute_input":"2023-11-10T12:18:05.136256Z","iopub.status.idle":"2023-11-10T12:18:05.160278Z","shell.execute_reply.started":"2023-11-10T12:18:05.136214Z","shell.execute_reply":"2023-11-10T12:18:05.159061Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# gradient boosting regression tree (GBRT)","metadata":{}},{"cell_type":"code","source":"import pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nimport numpy as np","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:14.784485Z","iopub.execute_input":"2023-11-10T12:18:14.784911Z","iopub.status.idle":"2023-11-10T12:18:14.790509Z","shell.execute_reply.started":"2023-11-10T12:18:14.784875Z","shell.execute_reply":"2023-11-10T12:18:14.789512Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:18.978476Z","iopub.execute_input":"2023-11-10T12:18:18.978911Z","iopub.status.idle":"2023-11-10T12:18:19.212039Z","shell.execute_reply.started":"2023-11-10T12:18:18.978875Z","shell.execute_reply":"2023-11-10T12:18:19.211248Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:27.965519Z","iopub.execute_input":"2023-11-10T12:18:27.966041Z","iopub.status.idle":"2023-11-10T12:18:27.980975Z","shell.execute_reply.started":"2023-11-10T12:18:27.966000Z","shell.execute_reply":"2023-11-10T12:18:27.979729Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGBoost (XGB)","metadata":{}},{"cell_type":"code","source":"import xgboost as xgb\nfrom xgboost import XGBRegressor","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:27:56.825898Z","iopub.execute_input":"2023-11-10T09:27:56.827005Z","iopub.status.idle":"2023-11-10T09:27:56.831441Z","shell.execute_reply.started":"2023-11-10T09:27:56.826958Z","shell.execute_reply":"2023-11-10T09:27:56.830597Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:39.566020Z","iopub.execute_input":"2023-11-10T12:18:39.567239Z","iopub.status.idle":"2023-11-10T12:18:39.797528Z","shell.execute_reply.started":"2023-11-10T12:18:39.567198Z","shell.execute_reply":"2023-11-10T12:18:39.796397Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:53.921918Z","iopub.execute_input":"2023-11-10T12:18:53.922350Z","iopub.status.idle":"2023-11-10T12:18:53.935099Z","shell.execute_reply.started":"2023-11-10T12:18:53.922316Z","shell.execute_reply":"2023-11-10T12:18:53.934282Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Support Vector Machine","metadata":{}},{"cell_type":"code","source":"from sklearn.svm import SVR","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:04.831700Z","iopub.execute_input":"2023-11-10T12:19:04.832107Z","iopub.status.idle":"2023-11-10T12:19:04.837889Z","shell.execute_reply.started":"2023-11-10T12:19:04.832066Z","shell.execute_reply":"2023-11-10T12:19:04.835706Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:08.722721Z","iopub.execute_input":"2023-11-10T12:19:08.723610Z","iopub.status.idle":"2023-11-10T12:19:10.196092Z","shell.execute_reply.started":"2023-11-10T12:19:08.723571Z","shell.execute_reply":"2023-11-10T12:19:10.195130Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:16.305538Z","iopub.execute_input":"2023-11-10T12:19:16.305947Z","iopub.status.idle":"2023-11-10T12:19:16.386902Z","shell.execute_reply.started":"2023-11-10T12:19:16.305916Z","shell.execute_reply":"2023-11-10T12:19:16.385609Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# neural networks","metadata":{}},{"cell_type":"markdown","source":"****NN1 (One Hidden Layer with 32 Neurons):","metadata":{}},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:35.569580Z","iopub.execute_input":"2023-11-10T12:19:35.570349Z","iopub.status.idle":"2023-11-10T12:19:35.585206Z","shell.execute_reply.started":"2023-11-10T12:19:35.570312Z","shell.execute_reply":"2023-11-10T12:19:35.584072Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:39.589482Z","iopub.execute_input":"2023-11-10T12:19:39.589901Z","iopub.status.idle":"2023-11-10T12:19:39.776938Z","shell.execute_reply.started":"2023-11-10T12:19:39.589868Z","shell.execute_reply":"2023-11-10T12:19:39.775818Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:46.791486Z","iopub.execute_input":"2023-11-10T12:19:46.791908Z","iopub.status.idle":"2023-11-10T12:20:02.614199Z","shell.execute_reply.started":"2023-11-10T12:19:46.791874Z","shell.execute_reply":"2023-11-10T12:20:02.613235Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN2","metadata":{}},{"cell_type":"code","source":"model_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:05.543593Z","iopub.execute_input":"2023-11-10T09:33:05.544045Z","iopub.status.idle":"2023-11-10T09:33:05.604562Z","shell.execute_reply.started":"2023-11-10T09:33:05.544011Z","shell.execute_reply":"2023-11-10T09:33:05.603194Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:14.871752Z","iopub.execute_input":"2023-11-10T09:33:14.872189Z","iopub.status.idle":"2023-11-10T09:33:31.879123Z","shell.execute_reply.started":"2023-11-10T09:33:14.872148Z","shell.execute_reply":"2023-11-10T09:33:31.878010Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"NN3","metadata":{}},{"cell_type":"code","source":"model_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:52.170576Z","iopub.execute_input":"2023-11-10T09:33:52.171184Z","iopub.status.idle":"2023-11-10T09:33:52.244431Z","shell.execute_reply.started":"2023-11-10T09:33:52.171147Z","shell.execute_reply":"2023-11-10T09:33:52.243206Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:58.925574Z","iopub.execute_input":"2023-11-10T09:33:58.926019Z","iopub.status.idle":"2023-11-10T09:34:16.876879Z","shell.execute_reply.started":"2023-11-10T09:33:58.925985Z","shell.execute_reply":"2023-11-10T09:34:16.875614Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN4 (Four Hidden Layers with 32, 16, 8, and 4 Neurons):","metadata":{}},{"cell_type":"code","source":"model_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:16.878993Z","iopub.execute_input":"2023-11-10T09:34:16.879330Z","iopub.status.idle":"2023-11-10T09:34:16.960631Z","shell.execute_reply.started":"2023-11-10T09:34:16.879301Z","shell.execute_reply":"2023-11-10T09:34:16.959306Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:22.836877Z","iopub.execute_input":"2023-11-10T09:34:22.837415Z","iopub.status.idle":"2023-11-10T09:34:40.655170Z","shell.execute_reply.started":"2023-11-10T09:34:22.837380Z","shell.execute_reply":"2023-11-10T09:34:40.654114Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN5 (Five Hidden Layers with 32, 16, 8, 4, and 2 Neurons):","metadata":{}},{"cell_type":"code","source":"model_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:40.759517Z","iopub.execute_input":"2023-11-10T09:34:40.760203Z","iopub.status.idle":"2023-11-10T09:34:40.847946Z","shell.execute_reply.started":"2023-11-10T09:34:40.760160Z","shell.execute_reply":"2023-11-10T09:34:40.846818Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T19:57:04.688156Z","iopub.execute_input":"2023-11-04T19:57:04.688624Z","iopub.status.idle":"2023-11-04T19:57:21.915022Z","shell.execute_reply.started":"2023-11-04T19:57:04.688588Z","shell.execute_reply":"2023-11-04T19:57:21.913796Z"},"jupyter":{"source_hidden":true},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Plot","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:42.784168Z","iopub.execute_input":"2023-11-10T09:34:42.784565Z","iopub.status.idle":"2023-11-10T09:34:42.790085Z","shell.execute_reply.started":"2023-11-10T09:34:42.784534Z","shell.execute_reply":"2023-11-10T09:34:42.788748Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Get feature importances from the RF model\nfeature_importances = rf_model.feature_importances_\n# Define the names of your factor variables\nfactor_names = ['SMB3', 'HML3', 'rm-rf']\n# Create a DataFrame to store feature importances along with factor names\nimportance_df = pd.DataFrame({'Factor': factor_names, 'Importance': feature_importances})\n\n# Sort the DataFrame by importance values in descending order\nimportance_df = importance_df.sort_values(by='Importance', ascending=False)\n\n# Create a bar plot to visualize feature importances\nplt.figure(figsize=(10, 6))\nplt.barh(importance_df['Factor'], importance_df['Importance'], color='skyblue')\nplt.xlabel('Importance')\nplt.title('Feature Importances for 4-Factor Model')\nplt.gca().invert_yaxis()  # Invert the y-axis to show the most important factors at the top\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:46.895496Z","iopub.execute_input":"2023-11-10T09:34:46.895924Z","iopub.status.idle":"2023-11-10T09:34:47.209392Z","shell.execute_reply.started":"2023-11-10T09:34:46.895875Z","shell.execute_reply":"2023-11-10T09:34:47.208110Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 4-Factors Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf', 'UMD']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-04T17:58:23.899568Z","iopub.execute_input":"2023-11-04T17:58:23.899963Z","iopub.status.idle":"2023-11-04T17:58:23.908414Z","shell.execute_reply.started":"2023-11-04T17:58:23.899932Z","shell.execute_reply":"2023-11-04T17:58:23.907250Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf', 'UMD']]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:00:10.576530Z","iopub.execute_input":"2023-11-04T18:00:10.576919Z","iopub.status.idle":"2023-11-04T18:00:10.583889Z","shell.execute_reply.started":"2023-11-04T18:00:10.576890Z","shell.execute_reply":"2023-11-04T18:00:10.582615Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:00:31.552473Z","iopub.execute_input":"2023-11-04T18:00:31.552910Z","iopub.status.idle":"2023-11-04T18:00:31.563101Z","shell.execute_reply.started":"2023-11-04T18:00:31.552876Z","shell.execute_reply":"2023-11-04T18:00:31.562110Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Random Forest model","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:02:47.792828Z","iopub.execute_input":"2023-11-04T18:02:47.793961Z","iopub.status.idle":"2023-11-04T18:02:48.364205Z","shell.execute_reply.started":"2023-11-04T18:02:47.793914Z","shell.execute_reply":"2023-11-04T18:02:48.363409Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:03:17.445535Z","iopub.execute_input":"2023-11-04T18:03:17.445935Z","iopub.status.idle":"2023-11-04T18:03:17.477398Z","shell.execute_reply.started":"2023-11-04T18:03:17.445906Z","shell.execute_reply":"2023-11-04T18:03:17.476285Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# gradient boosting regression tree (GBRT)","metadata":{}},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:04:24.398085Z","iopub.execute_input":"2023-11-04T18:04:24.398522Z","iopub.status.idle":"2023-11-04T18:04:24.679923Z","shell.execute_reply.started":"2023-11-04T18:04:24.398487Z","shell.execute_reply":"2023-11-04T18:04:24.678587Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:05:12.590610Z","iopub.execute_input":"2023-11-04T18:05:12.591015Z","iopub.status.idle":"2023-11-04T18:05:12.605723Z","shell.execute_reply.started":"2023-11-04T18:05:12.590984Z","shell.execute_reply":"2023-11-04T18:05:12.604514Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGBoost (XGB)","metadata":{}},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:07:06.144856Z","iopub.execute_input":"2023-11-04T18:07:06.145265Z","iopub.status.idle":"2023-11-04T18:07:06.370932Z","shell.execute_reply.started":"2023-11-04T18:07:06.145234Z","shell.execute_reply":"2023-11-04T18:07:06.369777Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:07:35.560571Z","iopub.execute_input":"2023-11-04T18:07:35.560996Z","iopub.status.idle":"2023-11-04T18:07:35.574935Z","shell.execute_reply.started":"2023-11-04T18:07:35.560960Z","shell.execute_reply":"2023-11-04T18:07:35.573767Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Support Vector Machine","metadata":{}},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:08:45.523060Z","iopub.execute_input":"2023-11-04T18:08:45.523503Z","iopub.status.idle":"2023-11-04T18:08:47.480828Z","shell.execute_reply.started":"2023-11-04T18:08:45.523465Z","shell.execute_reply":"2023-11-04T18:08:47.479626Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:09:14.836861Z","iopub.execute_input":"2023-11-04T18:09:14.837247Z","iopub.status.idle":"2023-11-04T18:09:14.923515Z","shell.execute_reply.started":"2023-11-04T18:09:14.837217Z","shell.execute_reply":"2023-11-04T18:09:14.922369Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# neural networks","metadata":{}},{"cell_type":"code","source":"import torch\nimport torch.nn as nn\nimport torch.optim as optim","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:33.861247Z","iopub.execute_input":"2023-11-10T09:30:33.862018Z","iopub.status.idle":"2023-11-10T09:30:33.867089Z","shell.execute_reply.started":"2023-11-10T09:30:33.861980Z","shell.execute_reply":"2023-11-10T09:30:33.865962Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom tensorflow import keras\nfrom tensorflow.keras import layers","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:37.499785Z","iopub.execute_input":"2023-11-10T09:30:37.500245Z","iopub.status.idle":"2023-11-10T09:30:47.102763Z","shell.execute_reply.started":"2023-11-10T09:30:37.500210Z","shell.execute_reply":"2023-11-10T09:30:47.101576Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import torch\nimport tensorflow as tf","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:58.131179Z","iopub.execute_input":"2023-11-10T09:30:58.132400Z","iopub.status.idle":"2023-11-10T09:30:58.137137Z","shell.execute_reply.started":"2023-11-10T09:30:58.132363Z","shell.execute_reply":"2023-11-10T09:30:58.135831Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:48:06.951287Z","iopub.execute_input":"2023-11-10T12:48:06.951798Z","iopub.status.idle":"2023-11-10T12:48:06.964177Z","shell.execute_reply.started":"2023-11-10T12:48:06.951762Z","shell.execute_reply":"2023-11-10T12:48:06.963129Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T19:06:15.363530Z","iopub.execute_input":"2023-11-04T19:06:15.363928Z","iopub.status.idle":"2023-11-04T19:06:15.370822Z","shell.execute_reply.started":"2023-11-04T19:06:15.363899Z","shell.execute_reply":"2023-11-04T19:06:15.370022Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:48:13.577522Z","iopub.execute_input":"2023-11-10T12:48:13.578405Z","iopub.status.idle":"2023-11-10T12:48:30.750395Z","shell.execute_reply.started":"2023-11-10T12:48:13.578358Z","shell.execute_reply":"2023-11-10T12:48:30.748446Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:49:38.179621Z","iopub.execute_input":"2023-11-10T12:49:38.180531Z","iopub.status.idle":"2023-11-10T12:49:54.138309Z","shell.execute_reply.started":"2023-11-10T12:49:38.180492Z","shell.execute_reply":"2023-11-10T12:49:54.137178Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:50:11.276040Z","iopub.execute_input":"2023-11-10T12:50:11.276444Z","iopub.status.idle":"2023-11-10T12:50:27.282860Z","shell.execute_reply.started":"2023-11-10T12:50:11.276412Z","shell.execute_reply":"2023-11-10T12:50:27.281738Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:50:43.786298Z","iopub.execute_input":"2023-11-10T12:50:43.787648Z","iopub.status.idle":"2023-11-10T12:50:59.770791Z","shell.execute_reply.started":"2023-11-10T12:50:43.787600Z","shell.execute_reply":"2023-11-10T12:50:59.769100Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:51:17.006100Z","iopub.execute_input":"2023-11-10T12:51:17.007012Z","iopub.status.idle":"2023-11-10T12:51:32.766563Z","shell.execute_reply.started":"2023-11-10T12:51:17.006969Z","shell.execute_reply":"2023-11-10T12:51:32.765368Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 5-Factor Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB', 'HML','rm-rf', 'CMA', 'RMW']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:30:58.105039Z","iopub.execute_input":"2023-11-04T20:30:58.105547Z","iopub.status.idle":"2023-11-04T20:30:58.113001Z","shell.execute_reply.started":"2023-11-04T20:30:58.105509Z","shell.execute_reply":"2023-11-04T20:30:58.111862Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:31:43.035131Z","iopub.execute_input":"2023-11-04T20:31:43.035633Z","iopub.status.idle":"2023-11-04T20:31:43.047528Z","shell.execute_reply.started":"2023-11-04T20:31:43.035594Z","shell.execute_reply":"2023-11-04T20:31:43.046451Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# RF","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:33:21.215586Z","iopub.execute_input":"2023-11-04T20:33:21.216010Z","iopub.status.idle":"2023-11-04T20:33:21.844784Z","shell.execute_reply.started":"2023-11-04T20:33:21.215979Z","shell.execute_reply":"2023-11-04T20:33:21.843976Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:33:52.950706Z","iopub.execute_input":"2023-11-04T20:33:52.951131Z","iopub.status.idle":"2023-11-04T20:33:52.981738Z","shell.execute_reply.started":"2023-11-04T20:33:52.951099Z","shell.execute_reply":"2023-11-04T20:33:52.980638Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# GBRT","metadata":{}},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:35:08.256350Z","iopub.execute_input":"2023-11-04T20:35:08.256966Z","iopub.status.idle":"2023-11-04T20:35:08.564673Z","shell.execute_reply.started":"2023-11-04T20:35:08.256902Z","shell.execute_reply":"2023-11-04T20:35:08.563492Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:36:11.211184Z","iopub.execute_input":"2023-11-04T20:36:11.211635Z","iopub.status.idle":"2023-11-04T20:36:11.225762Z","shell.execute_reply.started":"2023-11-04T20:36:11.211598Z","shell.execute_reply":"2023-11-04T20:36:11.224421Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGB","metadata":{}},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:38:36.416817Z","iopub.execute_input":"2023-11-04T20:38:36.417290Z","iopub.status.idle":"2023-11-04T20:38:36.668877Z","shell.execute_reply.started":"2023-11-04T20:38:36.417252Z","shell.execute_reply":"2023-11-04T20:38:36.667705Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:39:43.017843Z","iopub.execute_input":"2023-11-04T20:39:43.018350Z","iopub.status.idle":"2023-11-04T20:39:43.035843Z","shell.execute_reply.started":"2023-11-04T20:39:43.018307Z","shell.execute_reply":"2023-11-04T20:39:43.035025Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# SVM","metadata":{}},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:41:20.388879Z","iopub.execute_input":"2023-11-04T20:41:20.389351Z","iopub.status.idle":"2023-11-04T20:41:22.246592Z","shell.execute_reply.started":"2023-11-04T20:41:20.389309Z","shell.execute_reply":"2023-11-04T20:41:22.245521Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:41:58.017364Z","iopub.execute_input":"2023-11-04T20:41:58.017809Z","iopub.status.idle":"2023-11-04T20:41:58.110032Z","shell.execute_reply.started":"2023-11-04T20:41:58.017772Z","shell.execute_reply":"2023-11-04T20:41:58.108779Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Nural Networks","metadata":{}},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:44:11.741377Z","iopub.execute_input":"2023-11-04T20:44:11.741830Z","iopub.status.idle":"2023-11-04T20:44:11.755907Z","shell.execute_reply.started":"2023-11-04T20:44:11.741799Z","shell.execute_reply":"2023-11-04T20:44:11.754681Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:49:17.322766Z","iopub.execute_input":"2023-11-04T20:49:17.323234Z","iopub.status.idle":"2023-11-04T20:49:39.065326Z","shell.execute_reply.started":"2023-11-04T20:49:17.323197Z","shell.execute_reply":"2023-11-04T20:49:39.064517Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:50:02.348166Z","iopub.execute_input":"2023-11-04T20:50:02.349215Z","iopub.status.idle":"2023-11-04T20:50:19.975070Z","shell.execute_reply.started":"2023-11-04T20:50:02.349177Z","shell.execute_reply":"2023-11-04T20:50:19.973024Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:50:59.415307Z","iopub.execute_input":"2023-11-04T20:50:59.416065Z","iopub.status.idle":"2023-11-04T20:51:16.712927Z","shell.execute_reply.started":"2023-11-04T20:50:59.416016Z","shell.execute_reply":"2023-11-04T20:51:16.711697Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:14:57.990913Z","iopub.execute_input":"2023-11-04T21:14:57.991366Z","iopub.status.idle":"2023-11-04T21:15:15.450466Z","shell.execute_reply.started":"2023-11-04T21:14:57.991331Z","shell.execute_reply":"2023-11-04T21:15:15.449286Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:16:39.776639Z","iopub.execute_input":"2023-11-04T21:16:39.777054Z","iopub.status.idle":"2023-11-04T21:16:57.196617Z","shell.execute_reply.started":"2023-11-04T21:16:39.777020Z","shell.execute_reply":"2023-11-04T21:16:57.194278Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 6 Factor Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB', 'HML','rm-rf','UMD', 'CMA', 'RMW']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:30.545455Z","iopub.execute_input":"2023-11-04T21:30:30.545904Z","iopub.status.idle":"2023-11-04T21:30:30.559247Z","shell.execute_reply.started":"2023-11-04T21:30:30.545874Z","shell.execute_reply":"2023-11-04T21:30:30.558164Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:41.910401Z","iopub.execute_input":"2023-11-04T21:30:41.910898Z","iopub.status.idle":"2023-11-04T21:30:42.649531Z","shell.execute_reply.started":"2023-11-04T21:30:41.910859Z","shell.execute_reply":"2023-11-04T21:30:42.648386Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:55.932648Z","iopub.execute_input":"2023-11-04T21:30:55.933097Z","iopub.status.idle":"2023-11-04T21:30:56.288139Z","shell.execute_reply.started":"2023-11-04T21:30:55.933062Z","shell.execute_reply":"2023-11-04T21:30:56.286944Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:25.658673Z","iopub.execute_input":"2023-11-04T21:31:25.659129Z","iopub.status.idle":"2023-11-04T21:31:25.918903Z","shell.execute_reply.started":"2023-11-04T21:31:25.659097Z","shell.execute_reply":"2023-11-04T21:31:25.917985Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:42.411461Z","iopub.execute_input":"2023-11-04T21:31:42.412426Z","iopub.status.idle":"2023-11-04T21:31:44.932214Z","shell.execute_reply.started":"2023-11-04T21:31:42.412391Z","shell.execute_reply":"2023-11-04T21:31:44.930952Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:54.291349Z","iopub.execute_input":"2023-11-04T21:31:54.291765Z","iopub.status.idle":"2023-11-04T21:32:12.368425Z","shell.execute_reply.started":"2023-11-04T21:31:54.291735Z","shell.execute_reply":"2023-11-04T21:32:12.367160Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:32:50.574707Z","iopub.execute_input":"2023-11-04T21:32:50.575127Z","iopub.status.idle":"2023-11-04T21:33:08.021760Z","shell.execute_reply.started":"2023-11-04T21:32:50.575093Z","shell.execute_reply":"2023-11-04T21:33:08.020600Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:33:17.269084Z","iopub.execute_input":"2023-11-04T21:33:17.269496Z","iopub.status.idle":"2023-11-04T21:33:34.531209Z","shell.execute_reply.started":"2023-11-04T21:33:17.269461Z","shell.execute_reply":"2023-11-04T21:33:34.530095Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:33:45.626823Z","iopub.execute_input":"2023-11-04T21:33:45.627226Z","iopub.status.idle":"2023-11-04T21:34:06.350248Z","shell.execute_reply.started":"2023-11-04T21:33:45.627196Z","shell.execute_reply":"2023-11-04T21:34:06.348748Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:34:16.582416Z","iopub.execute_input":"2023-11-04T21:34:16.582865Z","iopub.status.idle":"2023-11-04T21:34:33.932808Z","shell.execute_reply.started":"2023-11-04T21:34:16.582823Z","shell.execute_reply":"2023-11-04T21:34:33.931637Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Q-Factor Model","metadata":{}},{"cell_type":"code","source":"#q factor\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:35:53.541197Z","iopub.execute_input":"2023-11-10T09:35:53.541658Z","iopub.status.idle":"2023-11-10T09:35:53.558176Z","shell.execute_reply.started":"2023-11-10T09:35:53.541625Z","shell.execute_reply":"2023-11-10T09:35:53.556953Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:01.634076Z","iopub.execute_input":"2023-11-10T09:36:01.634464Z","iopub.status.idle":"2023-11-10T09:36:02.308860Z","shell.execute_reply.started":"2023-11-10T09:36:01.634436Z","shell.execute_reply":"2023-11-10T09:36:02.307607Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:10.759137Z","iopub.execute_input":"2023-11-10T09:36:10.759536Z","iopub.status.idle":"2023-11-10T09:36:11.093456Z","shell.execute_reply.started":"2023-11-10T09:36:10.759506Z","shell.execute_reply":"2023-11-10T09:36:11.092178Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:19.407726Z","iopub.execute_input":"2023-11-10T09:36:19.409109Z","iopub.status.idle":"2023-11-10T09:36:19.753506Z","shell.execute_reply.started":"2023-11-10T09:36:19.409060Z","shell.execute_reply":"2023-11-10T09:36:19.752364Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:27.708452Z","iopub.execute_input":"2023-11-10T09:36:27.708853Z","iopub.status.idle":"2023-11-10T09:36:29.645863Z","shell.execute_reply.started":"2023-11-10T09:36:27.708822Z","shell.execute_reply":"2023-11-10T09:36:29.644606Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:37:10.614772Z","iopub.execute_input":"2023-11-10T09:37:10.615250Z","iopub.status.idle":"2023-11-10T09:37:29.666755Z","shell.execute_reply.started":"2023-11-10T09:37:10.615213Z","shell.execute_reply":"2023-11-10T09:37:29.665411Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:37:47.686931Z","iopub.execute_input":"2023-11-10T09:37:47.687353Z","iopub.status.idle":"2023-11-10T09:38:05.828271Z","shell.execute_reply.started":"2023-11-10T09:37:47.687316Z","shell.execute_reply":"2023-11-10T09:38:05.826961Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:38:16.148196Z","iopub.execute_input":"2023-11-10T09:38:16.148601Z","iopub.status.idle":"2023-11-10T09:38:33.864632Z","shell.execute_reply.started":"2023-11-10T09:38:16.148570Z","shell.execute_reply":"2023-11-10T09:38:33.863464Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:38:51.473183Z","iopub.execute_input":"2023-11-10T09:38:51.474198Z","iopub.status.idle":"2023-11-10T09:39:12.194945Z","shell.execute_reply.started":"2023-11-10T09:38:51.474151Z","shell.execute_reply":"2023-11-10T09:39:12.193652Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:49:07.450901Z","iopub.execute_input":"2023-11-04T21:49:07.451360Z","iopub.status.idle":"2023-11-04T21:49:24.680952Z","shell.execute_reply.started":"2023-11-04T21:49:07.451323Z","shell.execute_reply":"2023-11-04T21:49:24.679794Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# #q factor 2","metadata":{}},{"cell_type":"code","source":"#q factor 2\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE', 'ROEG']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:40:10.890094Z","iopub.execute_input":"2023-11-10T12:40:10.891309Z","iopub.status.idle":"2023-11-10T12:40:10.903034Z","shell.execute_reply.started":"2023-11-10T12:40:10.891243Z","shell.execute_reply":"2023-11-10T12:40:10.901766Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=50)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:40:18.126074Z","iopub.execute_input":"2023-11-10T12:40:18.126515Z","iopub.status.idle":"2023-11-10T12:40:18.828497Z","shell.execute_reply.started":"2023-11-10T12:40:18.126473Z","shell.execute_reply":"2023-11-10T12:40:18.827318Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:09:37.813602Z","iopub.execute_input":"2023-11-04T22:09:37.814012Z","iopub.status.idle":"2023-11-04T22:09:38.180533Z","shell.execute_reply.started":"2023-11-04T22:09:37.813980Z","shell.execute_reply":"2023-11-04T22:09:38.179174Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=120, random_state=57)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:41:47.635545Z","iopub.execute_input":"2023-11-10T12:41:47.635979Z","iopub.status.idle":"2023-11-10T12:41:47.922038Z","shell.execute_reply.started":"2023-11-10T12:41:47.635944Z","shell.execute_reply":"2023-11-10T12:41:47.920917Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:00.656822Z","iopub.execute_input":"2023-11-04T22:10:00.658100Z","iopub.status.idle":"2023-11-04T22:10:02.709745Z","shell.execute_reply.started":"2023-11-04T22:10:00.658060Z","shell.execute_reply":"2023-11-04T22:10:02.708539Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:14.635019Z","iopub.execute_input":"2023-11-04T22:10:14.635424Z","iopub.status.idle":"2023-11-04T22:10:33.867096Z","shell.execute_reply.started":"2023-11-04T22:10:14.635391Z","shell.execute_reply":"2023-11-04T22:10:33.865925Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from tensorflow.keras.utils import plot_model\n# Create a diagram of the model and save it as an image file\nplot_model(model_nn1, to_file='model_nn1.png', show_shapes=True, show_layer_names=True)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:15:17.133029Z","iopub.execute_input":"2023-11-04T22:15:17.133710Z","iopub.status.idle":"2023-11-04T22:15:17.201557Z","shell.execute_reply.started":"2023-11-04T22:15:17.133675Z","shell.execute_reply":"2023-11-04T22:15:17.200697Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pydot\nfrom keras.models import Sequential\nfrom keras.layers import Dense\nfrom keras.utils import plot_model","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:20:53.933509Z","iopub.execute_input":"2023-11-04T22:20:53.933914Z","iopub.status.idle":"2023-11-04T22:20:53.939468Z","shell.execute_reply.started":"2023-11-04T22:20:53.933884Z","shell.execute_reply":"2023-11-04T22:20:53.938281Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pip install ann_visualizer","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:26:30.483273Z","iopub.execute_input":"2023-11-04T22:26:30.483684Z","iopub.status.idle":"2023-11-04T22:26:46.783985Z","shell.execute_reply.started":"2023-11-04T22:26:30.483652Z","shell.execute_reply":"2023-11-04T22:26:46.782625Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN2\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:45.938901Z","iopub.execute_input":"2023-11-04T22:10:45.939396Z","iopub.status.idle":"2023-11-04T22:11:04.625656Z","shell.execute_reply.started":"2023-11-04T22:10:45.939358Z","shell.execute_reply":"2023-11-04T22:11:04.624499Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:11:16.299197Z","iopub.execute_input":"2023-11-04T22:11:16.299658Z","iopub.status.idle":"2023-11-04T22:11:34.444833Z","shell.execute_reply.started":"2023-11-04T22:11:16.299622Z","shell.execute_reply":"2023-11-04T22:11:34.443635Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:11:45.900922Z","iopub.execute_input":"2023-11-04T22:11:45.901370Z","iopub.status.idle":"2023-11-04T22:12:04.427192Z","shell.execute_reply.started":"2023-11-04T22:11:45.901334Z","shell.execute_reply":"2023-11-04T22:12:04.426314Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n    layers.Input(shape=(X_train.shape[1],)),\n    layers.Dense(32, activation='relu'),\n    layers.Dense(16, activation='relu'),\n    layers.Dense(8, activation='relu'),\n    layers.Dense(4, activation='relu'),\n    layers.Dense(2, activation='relu'),\n    layers.Dense(1)  # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:12:16.183984Z","iopub.execute_input":"2023-11-04T22:12:16.185053Z","iopub.status.idle":"2023-11-04T22:12:34.721042Z","shell.execute_reply.started":"2023-11-04T22:12:16.185016Z","shell.execute_reply":"2023-11-04T22:12:34.720061Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Correlation","metadata":{}},{"cell_type":"code","source":"# Predict on the training set\ny_train_pred = svr_model.predict(X_train)\n\n# Calculate the correlation between predicted and actual values in the training set\nin_sample_correlation = np.corrcoef(y_train, y_train_pred)[0, 1]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:42:25.176562Z","iopub.execute_input":"2023-11-04T18:42:25.176994Z","iopub.status.idle":"2023-11-04T18:42:25.552089Z","shell.execute_reply.started":"2023-11-04T18:42:25.176960Z","shell.execute_reply":"2023-11-04T18:42:25.551070Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# You have already calculated y_val_pred, which are the predictions on the validation set.\n\n# Calculate the correlation between predicted and actual values in the validation set\nout_of_sample_correlation = np.corrcoef(y_test, y_test_pred)[0, 1]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:44:25.712299Z","iopub.execute_input":"2023-11-04T18:44:25.712712Z","iopub.status.idle":"2023-11-04T18:44:25.718603Z","shell.execute_reply.started":"2023-11-04T18:44:25.712679Z","shell.execute_reply":"2023-11-04T18:44:25.717726Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Print both correlations\nprint(\"In-Sample Correlation:\", in_sample_correlation)\nprint(\"Out-of-Sample Correlation:\", out_of_sample_correlation)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:44:29.978779Z","iopub.execute_input":"2023-11-04T18:44:29.979346Z","iopub.status.idle":"2023-11-04T18:44:29.986222Z","shell.execute_reply.started":"2023-11-04T18:44:29.979292Z","shell.execute_reply":"2023-11-04T18:44:29.985120Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a bar chart for in-sample and out-of-sample correlations\ncorrelations = [in_sample_correlation, out_of_sample_correlation]\nlabels = ['In-Sample', 'Out-of-Sample']\nx = np.arange(len(labels))\nplt.subplot(1, 2, 2)  # Subplot for the correlations\nplt.bar(x, correlations, color=['blue', 'green'])\nplt.xticks(x, labels)\nplt.ylabel('Correlation Coefficient')\nplt.title('In-Sample vs. Out-of-Sample Correlations')\n\nplt.tight_layout()  # Adjust layout for readability\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:47:30.085857Z","iopub.execute_input":"2023-11-04T18:47:30.086253Z","iopub.status.idle":"2023-11-04T18:47:30.424591Z","shell.execute_reply.started":"2023-11-04T18:47:30.086223Z","shell.execute_reply":"2023-11-04T18:47:30.423276Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Descriptive Statistics","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nimport seaborn as sns","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:01:55.397824Z","iopub.execute_input":"2023-11-09T13:01:55.398181Z","iopub.status.idle":"2023-11-09T13:01:55.740078Z","shell.execute_reply.started":"2023-11-09T13:01:55.398155Z","shell.execute_reply":"2023-11-09T13:01:55.738559Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data.head()","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:15.348406Z","iopub.execute_input":"2023-11-09T13:02:15.348795Z","iopub.status.idle":"2023-11-09T13:02:15.370575Z","shell.execute_reply.started":"2023-11-09T13:02:15.348765Z","shell.execute_reply":"2023-11-09T13:02:15.369277Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a new variable 'rm-rf' by subtracting 'rf' from 'rm'\ndata['rm-rf'] = data['rm'] - data['rf']","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:18.988633Z","iopub.execute_input":"2023-11-09T13:02:18.989231Z","iopub.status.idle":"2023-11-09T13:02:18.993605Z","shell.execute_reply.started":"2023-11-09T13:02:18.989202Z","shell.execute_reply":"2023-11-09T13:02:18.992773Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Select the variables you want to include in the correlation matrix\nfactors = ['rm-rf', 'SMB','HML','CMA','UMD','RMW','rME','IA','ROE','ROEG']","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:25.003477Z","iopub.execute_input":"2023-11-09T13:02:25.003848Z","iopub.status.idle":"2023-11-09T13:02:25.009219Z","shell.execute_reply.started":"2023-11-09T13:02:25.003820Z","shell.execute_reply":"2023-11-09T13:02:25.007656Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Calculate the correlation matrix for the selected variables\ncorr_matrix = data[factors].corr()\ncorr_matrix","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:35.653707Z","iopub.execute_input":"2023-11-09T13:02:35.654069Z","iopub.status.idle":"2023-11-09T13:02:35.680366Z","shell.execute_reply.started":"2023-11-09T13:02:35.654046Z","shell.execute_reply":"2023-11-09T13:02:35.679318Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a heatmap to visualize the correlation matrix\nplt.figure(figsize=(8, 6))\nsns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)\nplt.title('Correlation Plot for Selected Factors', pad=20)  # Adjust the 'pad' parameter to control the distance\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-08T20:10:22.186529Z","iopub.execute_input":"2023-11-08T20:10:22.188010Z","iopub.status.idle":"2023-11-08T20:10:22.882816Z","shell.execute_reply.started":"2023-11-08T20:10:22.187937Z","shell.execute_reply":"2023-11-08T20:10:22.881073Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# plots","metadata":{}},{"cell_type":"code","source":"#q factor 2\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE', 'ROEG']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_rf = rf_model.predict(X_test)\n###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_gbrt = gbrt_model.predict(X_test)\n###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_xgb = xgb_model.predict(X_test)\n###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_svr = svr_model.predict(X_test)\n","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a single figure for all scatter plots\nplt.figure(figsize=(10, 8))\n# Set the main title for the entire figure\nplt.suptitle('Comparison of Q-Factors Model Predictions',fontsize=16, y=1.02)\n\n# Scatter plot for RF predictions on the test set\nplt.subplot(2, 2, 1)\nplt.scatter(y_test, y_test_pred_rf, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('RF Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for GBRT predictions on the test set\nplt.subplot(2, 2, 2)\nplt.scatter(y_test, y_test_pred_gbrt, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('GBRT Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for XGB predictions on the test set\nplt.subplot(2, 2, 3)\nplt.scatter(y_test, y_test_pred_xgb, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('XGBoost Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for SVM predictions on the test set\nplt.subplot(2, 2, 4)\nplt.scatter(y_test, y_test_pred_svr, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('SVM Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\nplt.tight_layout()\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-09T14:11:48.736716Z","iopub.execute_input":"2023-11-09T14:11:48.737086Z","iopub.status.idle":"2023-11-09T14:11:49.524759Z","shell.execute_reply.started":"2023-11-09T14:11:48.737058Z","shell.execute_reply":"2023-11-09T14:11:49.523323Z"},"trusted":true},"execution_count":null,"outputs":[]}]}