Created
November 15, 2023 10:40
-
-
Save riqbal-k/c3d9aeb65c55ab3312a429f1a206ab9c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":6855131,"sourceType":"datasetVersion","datasetId":3940311}],"dockerImageVersionId":30558,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2023-11-10T09:25:23.060869Z","iopub.execute_input":"2023-11-10T09:25:23.062312Z","iopub.status.idle":"2023-11-10T09:25:23.073329Z","shell.execute_reply.started":"2023-11-10T09:25:23.062265Z","shell.execute_reply":"2023-11-10T09:25:23.072151Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom sklearn.ensemble import RandomForestRegressor","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:16:54.914077Z","iopub.execute_input":"2023-11-10T12:16:54.915621Z","iopub.status.idle":"2023-11-10T12:16:59.705939Z","shell.execute_reply.started":"2023-11-10T12:16:54.915495Z","shell.execute_reply":"2023-11-10T12:16:59.704678Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import xgboost as xgb\nfrom xgboost import XGBRegressor\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.svm import SVR","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:06.041818Z","iopub.execute_input":"2023-11-10T12:17:06.042682Z","iopub.status.idle":"2023-11-10T12:17:06.217633Z","shell.execute_reply.started":"2023-11-10T12:17:06.042646Z","shell.execute_reply":"2023-11-10T12:17:06.216510Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom tensorflow import keras\nfrom tensorflow.keras import layers\nimport torch\nimport tensorflow as tf","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:12.892539Z","iopub.execute_input":"2023-11-10T12:17:12.892955Z","iopub.status.idle":"2023-11-10T12:17:21.863478Z","shell.execute_reply.started":"2023-11-10T12:17:12.892921Z","shell.execute_reply":"2023-11-10T12:17:21.862219Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Importing the dataset\ndata = pd.read_csv('/kaggle/input/pool23/pool2.csv')","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:21.865359Z","iopub.execute_input":"2023-11-10T12:17:21.866309Z","iopub.status.idle":"2023-11-10T12:17:21.927231Z","shell.execute_reply.started":"2023-11-10T12:17:21.866271Z","shell.execute_reply":"2023-11-10T12:17:21.925959Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data.head()","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:23.953472Z","iopub.execute_input":"2023-11-10T12:17:23.954600Z","iopub.status.idle":"2023-11-10T12:17:23.987854Z","shell.execute_reply.started":"2023-11-10T12:17:23.954559Z","shell.execute_reply":"2023-11-10T12:17:23.987002Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 3-factor model","metadata":{}},{"cell_type":"code","source":"# Create a new variable 'rm-rf' by subtracting 'rf' from 'rm'\ndata['rm-rf'] = data['rm'] - data['rf']\n# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:31.863994Z","iopub.execute_input":"2023-11-10T12:17:31.865356Z","iopub.status.idle":"2023-11-10T12:17:31.879937Z","shell.execute_reply.started":"2023-11-10T12:17:31.865309Z","shell.execute_reply":"2023-11-10T12:17:31.879056Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:47.898997Z","iopub.execute_input":"2023-11-10T12:17:47.899490Z","iopub.status.idle":"2023-11-10T12:17:47.909695Z","shell.execute_reply.started":"2023-11-10T12:17:47.899432Z","shell.execute_reply":"2023-11-10T12:17:47.908420Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Random Forest model","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:17:52.488292Z","iopub.execute_input":"2023-11-10T12:17:52.489687Z","iopub.status.idle":"2023-11-10T12:17:52.963711Z","shell.execute_reply.started":"2023-11-10T12:17:52.489630Z","shell.execute_reply":"2023-11-10T12:17:52.962635Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:01.018281Z","iopub.execute_input":"2023-11-10T12:18:01.019589Z","iopub.status.idle":"2023-11-10T12:18:01.046884Z","shell.execute_reply.started":"2023-11-10T12:18:01.019527Z","shell.execute_reply":"2023-11-10T12:18:01.045549Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:05.135231Z","iopub.execute_input":"2023-11-10T12:18:05.136256Z","iopub.status.idle":"2023-11-10T12:18:05.160278Z","shell.execute_reply.started":"2023-11-10T12:18:05.136214Z","shell.execute_reply":"2023-11-10T12:18:05.159061Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# gradient boosting regression tree (GBRT)","metadata":{}},{"cell_type":"code","source":"import pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nimport numpy as np","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:14.784485Z","iopub.execute_input":"2023-11-10T12:18:14.784911Z","iopub.status.idle":"2023-11-10T12:18:14.790509Z","shell.execute_reply.started":"2023-11-10T12:18:14.784875Z","shell.execute_reply":"2023-11-10T12:18:14.789512Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:18.978476Z","iopub.execute_input":"2023-11-10T12:18:18.978911Z","iopub.status.idle":"2023-11-10T12:18:19.212039Z","shell.execute_reply.started":"2023-11-10T12:18:18.978875Z","shell.execute_reply":"2023-11-10T12:18:19.211248Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:27.965519Z","iopub.execute_input":"2023-11-10T12:18:27.966041Z","iopub.status.idle":"2023-11-10T12:18:27.980975Z","shell.execute_reply.started":"2023-11-10T12:18:27.966000Z","shell.execute_reply":"2023-11-10T12:18:27.979729Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGBoost (XGB)","metadata":{}},{"cell_type":"code","source":"import xgboost as xgb\nfrom xgboost import XGBRegressor","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:27:56.825898Z","iopub.execute_input":"2023-11-10T09:27:56.827005Z","iopub.status.idle":"2023-11-10T09:27:56.831441Z","shell.execute_reply.started":"2023-11-10T09:27:56.826958Z","shell.execute_reply":"2023-11-10T09:27:56.830597Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:39.566020Z","iopub.execute_input":"2023-11-10T12:18:39.567239Z","iopub.status.idle":"2023-11-10T12:18:39.797528Z","shell.execute_reply.started":"2023-11-10T12:18:39.567198Z","shell.execute_reply":"2023-11-10T12:18:39.796397Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:18:53.921918Z","iopub.execute_input":"2023-11-10T12:18:53.922350Z","iopub.status.idle":"2023-11-10T12:18:53.935099Z","shell.execute_reply.started":"2023-11-10T12:18:53.922316Z","shell.execute_reply":"2023-11-10T12:18:53.934282Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Support Vector Machine","metadata":{}},{"cell_type":"code","source":"from sklearn.svm import SVR","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:04.831700Z","iopub.execute_input":"2023-11-10T12:19:04.832107Z","iopub.status.idle":"2023-11-10T12:19:04.837889Z","shell.execute_reply.started":"2023-11-10T12:19:04.832066Z","shell.execute_reply":"2023-11-10T12:19:04.835706Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:08.722721Z","iopub.execute_input":"2023-11-10T12:19:08.723610Z","iopub.status.idle":"2023-11-10T12:19:10.196092Z","shell.execute_reply.started":"2023-11-10T12:19:08.723571Z","shell.execute_reply":"2023-11-10T12:19:10.195130Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:16.305538Z","iopub.execute_input":"2023-11-10T12:19:16.305947Z","iopub.status.idle":"2023-11-10T12:19:16.386902Z","shell.execute_reply.started":"2023-11-10T12:19:16.305916Z","shell.execute_reply":"2023-11-10T12:19:16.385609Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# neural networks","metadata":{}},{"cell_type":"markdown","source":"****NN1 (One Hidden Layer with 32 Neurons):","metadata":{}},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:35.569580Z","iopub.execute_input":"2023-11-10T12:19:35.570349Z","iopub.status.idle":"2023-11-10T12:19:35.585206Z","shell.execute_reply.started":"2023-11-10T12:19:35.570312Z","shell.execute_reply":"2023-11-10T12:19:35.584072Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:39.589482Z","iopub.execute_input":"2023-11-10T12:19:39.589901Z","iopub.status.idle":"2023-11-10T12:19:39.776938Z","shell.execute_reply.started":"2023-11-10T12:19:39.589868Z","shell.execute_reply":"2023-11-10T12:19:39.775818Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:19:46.791486Z","iopub.execute_input":"2023-11-10T12:19:46.791908Z","iopub.status.idle":"2023-11-10T12:20:02.614199Z","shell.execute_reply.started":"2023-11-10T12:19:46.791874Z","shell.execute_reply":"2023-11-10T12:20:02.613235Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN2","metadata":{}},{"cell_type":"code","source":"model_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:05.543593Z","iopub.execute_input":"2023-11-10T09:33:05.544045Z","iopub.status.idle":"2023-11-10T09:33:05.604562Z","shell.execute_reply.started":"2023-11-10T09:33:05.544011Z","shell.execute_reply":"2023-11-10T09:33:05.603194Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:14.871752Z","iopub.execute_input":"2023-11-10T09:33:14.872189Z","iopub.status.idle":"2023-11-10T09:33:31.879123Z","shell.execute_reply.started":"2023-11-10T09:33:14.872148Z","shell.execute_reply":"2023-11-10T09:33:31.878010Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"NN3","metadata":{}},{"cell_type":"code","source":"model_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:52.170576Z","iopub.execute_input":"2023-11-10T09:33:52.171184Z","iopub.status.idle":"2023-11-10T09:33:52.244431Z","shell.execute_reply.started":"2023-11-10T09:33:52.171147Z","shell.execute_reply":"2023-11-10T09:33:52.243206Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:33:58.925574Z","iopub.execute_input":"2023-11-10T09:33:58.926019Z","iopub.status.idle":"2023-11-10T09:34:16.876879Z","shell.execute_reply.started":"2023-11-10T09:33:58.925985Z","shell.execute_reply":"2023-11-10T09:34:16.875614Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN4 (Four Hidden Layers with 32, 16, 8, and 4 Neurons):","metadata":{}},{"cell_type":"code","source":"model_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:16.878993Z","iopub.execute_input":"2023-11-10T09:34:16.879330Z","iopub.status.idle":"2023-11-10T09:34:16.960631Z","shell.execute_reply.started":"2023-11-10T09:34:16.879301Z","shell.execute_reply":"2023-11-10T09:34:16.959306Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:22.836877Z","iopub.execute_input":"2023-11-10T09:34:22.837415Z","iopub.status.idle":"2023-11-10T09:34:40.655170Z","shell.execute_reply.started":"2023-11-10T09:34:22.837380Z","shell.execute_reply":"2023-11-10T09:34:40.654114Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# NN5 (Five Hidden Layers with 32, 16, 8, 4, and 2 Neurons):","metadata":{}},{"cell_type":"code","source":"model_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:40.759517Z","iopub.execute_input":"2023-11-10T09:34:40.760203Z","iopub.status.idle":"2023-11-10T09:34:40.847946Z","shell.execute_reply.started":"2023-11-10T09:34:40.760160Z","shell.execute_reply":"2023-11-10T09:34:40.846818Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T19:57:04.688156Z","iopub.execute_input":"2023-11-04T19:57:04.688624Z","iopub.status.idle":"2023-11-04T19:57:21.915022Z","shell.execute_reply.started":"2023-11-04T19:57:04.688588Z","shell.execute_reply":"2023-11-04T19:57:21.913796Z"},"jupyter":{"source_hidden":true},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Plot","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:42.784168Z","iopub.execute_input":"2023-11-10T09:34:42.784565Z","iopub.status.idle":"2023-11-10T09:34:42.790085Z","shell.execute_reply.started":"2023-11-10T09:34:42.784534Z","shell.execute_reply":"2023-11-10T09:34:42.788748Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Get feature importances from the RF model\nfeature_importances = rf_model.feature_importances_\n# Define the names of your factor variables\nfactor_names = ['SMB3', 'HML3', 'rm-rf']\n# Create a DataFrame to store feature importances along with factor names\nimportance_df = pd.DataFrame({'Factor': factor_names, 'Importance': feature_importances})\n\n# Sort the DataFrame by importance values in descending order\nimportance_df = importance_df.sort_values(by='Importance', ascending=False)\n\n# Create a bar plot to visualize feature importances\nplt.figure(figsize=(10, 6))\nplt.barh(importance_df['Factor'], importance_df['Importance'], color='skyblue')\nplt.xlabel('Importance')\nplt.title('Feature Importances for 4-Factor Model')\nplt.gca().invert_yaxis() # Invert the y-axis to show the most important factors at the top\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:34:46.895496Z","iopub.execute_input":"2023-11-10T09:34:46.895924Z","iopub.status.idle":"2023-11-10T09:34:47.209392Z","shell.execute_reply.started":"2023-11-10T09:34:46.895875Z","shell.execute_reply":"2023-11-10T09:34:47.208110Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 4-Factors Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf', 'UMD']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-04T17:58:23.899568Z","iopub.execute_input":"2023-11-04T17:58:23.899963Z","iopub.status.idle":"2023-11-04T17:58:23.908414Z","shell.execute_reply.started":"2023-11-04T17:58:23.899932Z","shell.execute_reply":"2023-11-04T17:58:23.907250Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB3', 'HML3','rm-rf', 'UMD']]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:00:10.576530Z","iopub.execute_input":"2023-11-04T18:00:10.576919Z","iopub.status.idle":"2023-11-04T18:00:10.583889Z","shell.execute_reply.started":"2023-11-04T18:00:10.576890Z","shell.execute_reply":"2023-11-04T18:00:10.582615Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:00:31.552473Z","iopub.execute_input":"2023-11-04T18:00:31.552910Z","iopub.status.idle":"2023-11-04T18:00:31.563101Z","shell.execute_reply.started":"2023-11-04T18:00:31.552876Z","shell.execute_reply":"2023-11-04T18:00:31.562110Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Random Forest model","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:02:47.792828Z","iopub.execute_input":"2023-11-04T18:02:47.793961Z","iopub.status.idle":"2023-11-04T18:02:48.364205Z","shell.execute_reply.started":"2023-11-04T18:02:47.793914Z","shell.execute_reply":"2023-11-04T18:02:48.363409Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:03:17.445535Z","iopub.execute_input":"2023-11-04T18:03:17.445935Z","iopub.status.idle":"2023-11-04T18:03:17.477398Z","shell.execute_reply.started":"2023-11-04T18:03:17.445906Z","shell.execute_reply":"2023-11-04T18:03:17.476285Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# gradient boosting regression tree (GBRT)","metadata":{}},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:04:24.398085Z","iopub.execute_input":"2023-11-04T18:04:24.398522Z","iopub.status.idle":"2023-11-04T18:04:24.679923Z","shell.execute_reply.started":"2023-11-04T18:04:24.398487Z","shell.execute_reply":"2023-11-04T18:04:24.678587Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:05:12.590610Z","iopub.execute_input":"2023-11-04T18:05:12.591015Z","iopub.status.idle":"2023-11-04T18:05:12.605723Z","shell.execute_reply.started":"2023-11-04T18:05:12.590984Z","shell.execute_reply":"2023-11-04T18:05:12.604514Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGBoost (XGB)","metadata":{}},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:07:06.144856Z","iopub.execute_input":"2023-11-04T18:07:06.145265Z","iopub.status.idle":"2023-11-04T18:07:06.370932Z","shell.execute_reply.started":"2023-11-04T18:07:06.145234Z","shell.execute_reply":"2023-11-04T18:07:06.369777Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:07:35.560571Z","iopub.execute_input":"2023-11-04T18:07:35.560996Z","iopub.status.idle":"2023-11-04T18:07:35.574935Z","shell.execute_reply.started":"2023-11-04T18:07:35.560960Z","shell.execute_reply":"2023-11-04T18:07:35.573767Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Support Vector Machine","metadata":{}},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:08:45.523060Z","iopub.execute_input":"2023-11-04T18:08:45.523503Z","iopub.status.idle":"2023-11-04T18:08:47.480828Z","shell.execute_reply.started":"2023-11-04T18:08:45.523465Z","shell.execute_reply":"2023-11-04T18:08:47.479626Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:09:14.836861Z","iopub.execute_input":"2023-11-04T18:09:14.837247Z","iopub.status.idle":"2023-11-04T18:09:14.923515Z","shell.execute_reply.started":"2023-11-04T18:09:14.837217Z","shell.execute_reply":"2023-11-04T18:09:14.922369Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# neural networks","metadata":{}},{"cell_type":"code","source":"import torch\nimport torch.nn as nn\nimport torch.optim as optim","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:33.861247Z","iopub.execute_input":"2023-11-10T09:30:33.862018Z","iopub.status.idle":"2023-11-10T09:30:33.867089Z","shell.execute_reply.started":"2023-11-10T09:30:33.861980Z","shell.execute_reply":"2023-11-10T09:30:33.865962Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\nfrom tensorflow import keras\nfrom tensorflow.keras import layers","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:37.499785Z","iopub.execute_input":"2023-11-10T09:30:37.500245Z","iopub.status.idle":"2023-11-10T09:30:47.102763Z","shell.execute_reply.started":"2023-11-10T09:30:37.500210Z","shell.execute_reply":"2023-11-10T09:30:47.101576Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import torch\nimport tensorflow as tf","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:30:58.131179Z","iopub.execute_input":"2023-11-10T09:30:58.132400Z","iopub.status.idle":"2023-11-10T09:30:58.137137Z","shell.execute_reply.started":"2023-11-10T09:30:58.132363Z","shell.execute_reply":"2023-11-10T09:30:58.135831Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:48:06.951287Z","iopub.execute_input":"2023-11-10T12:48:06.951798Z","iopub.status.idle":"2023-11-10T12:48:06.964177Z","shell.execute_reply.started":"2023-11-10T12:48:06.951762Z","shell.execute_reply":"2023-11-10T12:48:06.963129Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T19:06:15.363530Z","iopub.execute_input":"2023-11-04T19:06:15.363928Z","iopub.status.idle":"2023-11-04T19:06:15.370822Z","shell.execute_reply.started":"2023-11-04T19:06:15.363899Z","shell.execute_reply":"2023-11-04T19:06:15.370022Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:48:13.577522Z","iopub.execute_input":"2023-11-10T12:48:13.578405Z","iopub.status.idle":"2023-11-10T12:48:30.750395Z","shell.execute_reply.started":"2023-11-10T12:48:13.578358Z","shell.execute_reply":"2023-11-10T12:48:30.748446Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:49:38.179621Z","iopub.execute_input":"2023-11-10T12:49:38.180531Z","iopub.status.idle":"2023-11-10T12:49:54.138309Z","shell.execute_reply.started":"2023-11-10T12:49:38.180492Z","shell.execute_reply":"2023-11-10T12:49:54.137178Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:50:11.276040Z","iopub.execute_input":"2023-11-10T12:50:11.276444Z","iopub.status.idle":"2023-11-10T12:50:27.282860Z","shell.execute_reply.started":"2023-11-10T12:50:11.276412Z","shell.execute_reply":"2023-11-10T12:50:27.281738Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:50:43.786298Z","iopub.execute_input":"2023-11-10T12:50:43.787648Z","iopub.status.idle":"2023-11-10T12:50:59.770791Z","shell.execute_reply.started":"2023-11-10T12:50:43.787600Z","shell.execute_reply":"2023-11-10T12:50:59.769100Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:51:17.006100Z","iopub.execute_input":"2023-11-10T12:51:17.007012Z","iopub.status.idle":"2023-11-10T12:51:32.766563Z","shell.execute_reply.started":"2023-11-10T12:51:17.006969Z","shell.execute_reply":"2023-11-10T12:51:32.765368Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 5-Factor Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB', 'HML','rm-rf', 'CMA', 'RMW']]\ny = data['rt']","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:30:58.105039Z","iopub.execute_input":"2023-11-04T20:30:58.105547Z","iopub.status.idle":"2023-11-04T20:30:58.113001Z","shell.execute_reply.started":"2023-11-04T20:30:58.105509Z","shell.execute_reply":"2023-11-04T20:30:58.111862Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:31:43.035131Z","iopub.execute_input":"2023-11-04T20:31:43.035633Z","iopub.status.idle":"2023-11-04T20:31:43.047528Z","shell.execute_reply.started":"2023-11-04T20:31:43.035594Z","shell.execute_reply":"2023-11-04T20:31:43.046451Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# RF","metadata":{}},{"cell_type":"code","source":"# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:33:21.215586Z","iopub.execute_input":"2023-11-04T20:33:21.216010Z","iopub.status.idle":"2023-11-04T20:33:21.844784Z","shell.execute_reply.started":"2023-11-04T20:33:21.215979Z","shell.execute_reply":"2023-11-04T20:33:21.843976Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:33:52.950706Z","iopub.execute_input":"2023-11-04T20:33:52.951131Z","iopub.status.idle":"2023-11-04T20:33:52.981738Z","shell.execute_reply.started":"2023-11-04T20:33:52.951099Z","shell.execute_reply":"2023-11-04T20:33:52.980638Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# GBRT","metadata":{}},{"cell_type":"code","source":"# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:35:08.256350Z","iopub.execute_input":"2023-11-04T20:35:08.256966Z","iopub.status.idle":"2023-11-04T20:35:08.564673Z","shell.execute_reply.started":"2023-11-04T20:35:08.256902Z","shell.execute_reply":"2023-11-04T20:35:08.563492Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:36:11.211184Z","iopub.execute_input":"2023-11-04T20:36:11.211635Z","iopub.status.idle":"2023-11-04T20:36:11.225762Z","shell.execute_reply.started":"2023-11-04T20:36:11.211598Z","shell.execute_reply":"2023-11-04T20:36:11.224421Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# XGB","metadata":{}},{"cell_type":"code","source":"# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:38:36.416817Z","iopub.execute_input":"2023-11-04T20:38:36.417290Z","iopub.status.idle":"2023-11-04T20:38:36.668877Z","shell.execute_reply.started":"2023-11-04T20:38:36.417252Z","shell.execute_reply":"2023-11-04T20:38:36.667705Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:39:43.017843Z","iopub.execute_input":"2023-11-04T20:39:43.018350Z","iopub.status.idle":"2023-11-04T20:39:43.035843Z","shell.execute_reply.started":"2023-11-04T20:39:43.018307Z","shell.execute_reply":"2023-11-04T20:39:43.035025Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# SVM","metadata":{}},{"cell_type":"code","source":"# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:41:20.388879Z","iopub.execute_input":"2023-11-04T20:41:20.389351Z","iopub.status.idle":"2023-11-04T20:41:22.246592Z","shell.execute_reply.started":"2023-11-04T20:41:20.389309Z","shell.execute_reply":"2023-11-04T20:41:22.245521Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:41:58.017364Z","iopub.execute_input":"2023-11-04T20:41:58.017809Z","iopub.status.idle":"2023-11-04T20:41:58.110032Z","shell.execute_reply.started":"2023-11-04T20:41:58.017772Z","shell.execute_reply":"2023-11-04T20:41:58.108779Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Nural Networks","metadata":{}},{"cell_type":"code","source":"# Standardize your input data\ninput_scaler = StandardScaler()\nX_train = input_scaler.fit_transform(X_train)\nX_val = input_scaler.transform(X_val)\nX_test = input_scaler.transform(X_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:44:11.741377Z","iopub.execute_input":"2023-11-04T20:44:11.741830Z","iopub.status.idle":"2023-11-04T20:44:11.755907Z","shell.execute_reply.started":"2023-11-04T20:44:11.741799Z","shell.execute_reply":"2023-11-04T20:44:11.754681Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:49:17.322766Z","iopub.execute_input":"2023-11-04T20:49:17.323234Z","iopub.status.idle":"2023-11-04T20:49:39.065326Z","shell.execute_reply.started":"2023-11-04T20:49:17.323197Z","shell.execute_reply":"2023-11-04T20:49:39.064517Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:50:02.348166Z","iopub.execute_input":"2023-11-04T20:50:02.349215Z","iopub.status.idle":"2023-11-04T20:50:19.975070Z","shell.execute_reply.started":"2023-11-04T20:50:02.349177Z","shell.execute_reply":"2023-11-04T20:50:19.973024Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T20:50:59.415307Z","iopub.execute_input":"2023-11-04T20:50:59.416065Z","iopub.status.idle":"2023-11-04T20:51:16.712927Z","shell.execute_reply.started":"2023-11-04T20:50:59.416016Z","shell.execute_reply":"2023-11-04T20:51:16.711697Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:14:57.990913Z","iopub.execute_input":"2023-11-04T21:14:57.991366Z","iopub.status.idle":"2023-11-04T21:15:15.450466Z","shell.execute_reply.started":"2023-11-04T21:14:57.991331Z","shell.execute_reply":"2023-11-04T21:15:15.449286Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:16:39.776639Z","iopub.execute_input":"2023-11-04T21:16:39.777054Z","iopub.status.idle":"2023-11-04T21:16:57.196617Z","shell.execute_reply.started":"2023-11-04T21:16:39.777020Z","shell.execute_reply":"2023-11-04T21:16:57.194278Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# 6 Factor Model","metadata":{}},{"cell_type":"code","source":"# Define independent variables (features) and dependent variable\nX = data[['SMB', 'HML','rm-rf','UMD', 'CMA', 'RMW']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:30.545455Z","iopub.execute_input":"2023-11-04T21:30:30.545904Z","iopub.status.idle":"2023-11-04T21:30:30.559247Z","shell.execute_reply.started":"2023-11-04T21:30:30.545874Z","shell.execute_reply":"2023-11-04T21:30:30.558164Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:41.910401Z","iopub.execute_input":"2023-11-04T21:30:41.910898Z","iopub.status.idle":"2023-11-04T21:30:42.649531Z","shell.execute_reply.started":"2023-11-04T21:30:41.910859Z","shell.execute_reply":"2023-11-04T21:30:42.648386Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:30:55.932648Z","iopub.execute_input":"2023-11-04T21:30:55.933097Z","iopub.status.idle":"2023-11-04T21:30:56.288139Z","shell.execute_reply.started":"2023-11-04T21:30:55.933062Z","shell.execute_reply":"2023-11-04T21:30:56.286944Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:25.658673Z","iopub.execute_input":"2023-11-04T21:31:25.659129Z","iopub.status.idle":"2023-11-04T21:31:25.918903Z","shell.execute_reply.started":"2023-11-04T21:31:25.659097Z","shell.execute_reply":"2023-11-04T21:31:25.917985Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:42.411461Z","iopub.execute_input":"2023-11-04T21:31:42.412426Z","iopub.status.idle":"2023-11-04T21:31:44.932214Z","shell.execute_reply.started":"2023-11-04T21:31:42.412391Z","shell.execute_reply":"2023-11-04T21:31:44.930952Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:31:54.291349Z","iopub.execute_input":"2023-11-04T21:31:54.291765Z","iopub.status.idle":"2023-11-04T21:32:12.368425Z","shell.execute_reply.started":"2023-11-04T21:31:54.291735Z","shell.execute_reply":"2023-11-04T21:32:12.367160Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:32:50.574707Z","iopub.execute_input":"2023-11-04T21:32:50.575127Z","iopub.status.idle":"2023-11-04T21:33:08.021760Z","shell.execute_reply.started":"2023-11-04T21:32:50.575093Z","shell.execute_reply":"2023-11-04T21:33:08.020600Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:33:17.269084Z","iopub.execute_input":"2023-11-04T21:33:17.269496Z","iopub.status.idle":"2023-11-04T21:33:34.531209Z","shell.execute_reply.started":"2023-11-04T21:33:17.269461Z","shell.execute_reply":"2023-11-04T21:33:34.530095Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:33:45.626823Z","iopub.execute_input":"2023-11-04T21:33:45.627226Z","iopub.status.idle":"2023-11-04T21:34:06.350248Z","shell.execute_reply.started":"2023-11-04T21:33:45.627196Z","shell.execute_reply":"2023-11-04T21:34:06.348748Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:34:16.582416Z","iopub.execute_input":"2023-11-04T21:34:16.582865Z","iopub.status.idle":"2023-11-04T21:34:33.932808Z","shell.execute_reply.started":"2023-11-04T21:34:16.582823Z","shell.execute_reply":"2023-11-04T21:34:33.931637Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Q-Factor Model","metadata":{}},{"cell_type":"code","source":"#q factor\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:35:53.541197Z","iopub.execute_input":"2023-11-10T09:35:53.541658Z","iopub.status.idle":"2023-11-10T09:35:53.558176Z","shell.execute_reply.started":"2023-11-10T09:35:53.541625Z","shell.execute_reply":"2023-11-10T09:35:53.556953Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:01.634076Z","iopub.execute_input":"2023-11-10T09:36:01.634464Z","iopub.status.idle":"2023-11-10T09:36:02.308860Z","shell.execute_reply.started":"2023-11-10T09:36:01.634436Z","shell.execute_reply":"2023-11-10T09:36:02.307607Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:10.759137Z","iopub.execute_input":"2023-11-10T09:36:10.759536Z","iopub.status.idle":"2023-11-10T09:36:11.093456Z","shell.execute_reply.started":"2023-11-10T09:36:10.759506Z","shell.execute_reply":"2023-11-10T09:36:11.092178Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:19.407726Z","iopub.execute_input":"2023-11-10T09:36:19.409109Z","iopub.status.idle":"2023-11-10T09:36:19.753506Z","shell.execute_reply.started":"2023-11-10T09:36:19.409060Z","shell.execute_reply":"2023-11-10T09:36:19.752364Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:36:27.708452Z","iopub.execute_input":"2023-11-10T09:36:27.708853Z","iopub.status.idle":"2023-11-10T09:36:29.645863Z","shell.execute_reply.started":"2023-11-10T09:36:27.708822Z","shell.execute_reply":"2023-11-10T09:36:29.644606Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:37:10.614772Z","iopub.execute_input":"2023-11-10T09:37:10.615250Z","iopub.status.idle":"2023-11-10T09:37:29.666755Z","shell.execute_reply.started":"2023-11-10T09:37:10.615213Z","shell.execute_reply":"2023-11-10T09:37:29.665411Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:37:47.686931Z","iopub.execute_input":"2023-11-10T09:37:47.687353Z","iopub.status.idle":"2023-11-10T09:38:05.828271Z","shell.execute_reply.started":"2023-11-10T09:37:47.687316Z","shell.execute_reply":"2023-11-10T09:38:05.826961Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:38:16.148196Z","iopub.execute_input":"2023-11-10T09:38:16.148601Z","iopub.status.idle":"2023-11-10T09:38:33.864632Z","shell.execute_reply.started":"2023-11-10T09:38:16.148570Z","shell.execute_reply":"2023-11-10T09:38:33.863464Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n","metadata":{"execution":{"iopub.status.busy":"2023-11-10T09:38:51.473183Z","iopub.execute_input":"2023-11-10T09:38:51.474198Z","iopub.status.idle":"2023-11-10T09:39:12.194945Z","shell.execute_reply.started":"2023-11-10T09:38:51.474151Z","shell.execute_reply":"2023-11-10T09:39:12.193652Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T21:49:07.450901Z","iopub.execute_input":"2023-11-04T21:49:07.451360Z","iopub.status.idle":"2023-11-04T21:49:24.680952Z","shell.execute_reply.started":"2023-11-04T21:49:07.451323Z","shell.execute_reply":"2023-11-04T21:49:24.679794Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# #q factor 2","metadata":{}},{"cell_type":"code","source":"#q factor 2\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE', 'ROEG']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:40:10.890094Z","iopub.execute_input":"2023-11-10T12:40:10.891309Z","iopub.status.idle":"2023-11-10T12:40:10.903034Z","shell.execute_reply.started":"2023-11-10T12:40:10.891243Z","shell.execute_reply":"2023-11-10T12:40:10.901766Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#Random Forest\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=50)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = rf_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = rf_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:40:18.126074Z","iopub.execute_input":"2023-11-10T12:40:18.126515Z","iopub.status.idle":"2023-11-10T12:40:18.828497Z","shell.execute_reply.started":"2023-11-10T12:40:18.126473Z","shell.execute_reply":"2023-11-10T12:40:18.827318Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = gbrt_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = gbrt_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:09:37.813602Z","iopub.execute_input":"2023-11-04T22:09:37.814012Z","iopub.status.idle":"2023-11-04T22:09:38.180533Z","shell.execute_reply.started":"2023-11-04T22:09:37.813980Z","shell.execute_reply":"2023-11-04T22:09:38.179174Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=120, random_state=57)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Make predictions on the validation set\ny_val_pred = xgb_model.predict(X_val)\n\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = xgb_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")","metadata":{"execution":{"iopub.status.busy":"2023-11-10T12:41:47.635545Z","iopub.execute_input":"2023-11-10T12:41:47.635979Z","iopub.status.idle":"2023-11-10T12:41:47.922038Z","shell.execute_reply.started":"2023-11-10T12:41:47.635944Z","shell.execute_reply":"2023-11-10T12:41:47.920917Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n\n# Make predictions on the validation set\ny_val_pred = svr_model.predict(X_val)\n# Calculate validation R-squared (R2)\nval_r2 = r2_score(y_val, y_val_pred)\n\n# Calculate validation Root Mean Squared Error (RMSE)\nval_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))\n\n# Calculate validation Mean Absolute Error (MAE)\nval_mae = mean_absolute_error(y_val, y_val_pred)\n\nprint(f\"Validation R-squared (R2): {val_r2:.4f}\")\nprint(f\"Validation Root Mean Squared Error (RMSE): {val_rmse:.4f}\")\nprint(f\"Validation Mean Absolute Error (MAE): {val_mae:.4f}\")\n# Now, evaluate the model on the testing set\ny_test_pred = svr_model.predict(X_test)\n\n# Calculate testing R-squared (R2)\ntest_r2 = r2_score(y_test, y_test_pred)\n\n# Calculate testing Root Mean Squared Error (RMSE)\ntest_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))\n\n# Calculate testing Mean Absolute Error (MAE)\ntest_mae = mean_absolute_error(y_test, y_test_pred)\n\nprint(f\"Testing R-squared (R2): {test_r2:.4f}\")\nprint(f\"Testing Root Mean Squared Error (RMSE): {test_rmse:.4f}\")\nprint(f\"Testing Mean Absolute Error (MAE): {test_mae:.4f}\")\n","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:00.656822Z","iopub.execute_input":"2023-11-04T22:10:00.658100Z","iopub.status.idle":"2023-11-04T22:10:02.709745Z","shell.execute_reply.started":"2023-11-04T22:10:00.658060Z","shell.execute_reply":"2023-11-04T22:10:02.708539Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN1\nmodel_nn1 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn1.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN1\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:14.635019Z","iopub.execute_input":"2023-11-04T22:10:14.635424Z","iopub.status.idle":"2023-11-04T22:10:33.867096Z","shell.execute_reply.started":"2023-11-04T22:10:14.635391Z","shell.execute_reply":"2023-11-04T22:10:33.865925Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from tensorflow.keras.utils import plot_model\n# Create a diagram of the model and save it as an image file\nplot_model(model_nn1, to_file='model_nn1.png', show_shapes=True, show_layer_names=True)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:15:17.133029Z","iopub.execute_input":"2023-11-04T22:15:17.133710Z","iopub.status.idle":"2023-11-04T22:15:17.201557Z","shell.execute_reply.started":"2023-11-04T22:15:17.133675Z","shell.execute_reply":"2023-11-04T22:15:17.200697Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pydot\nfrom keras.models import Sequential\nfrom keras.layers import Dense\nfrom keras.utils import plot_model","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:20:53.933509Z","iopub.execute_input":"2023-11-04T22:20:53.933914Z","iopub.status.idle":"2023-11-04T22:20:53.939468Z","shell.execute_reply.started":"2023-11-04T22:20:53.933884Z","shell.execute_reply":"2023-11-04T22:20:53.938281Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pip install ann_visualizer","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:26:30.483273Z","iopub.execute_input":"2023-11-04T22:26:30.483684Z","iopub.status.idle":"2023-11-04T22:26:46.783985Z","shell.execute_reply.started":"2023-11-04T22:26:30.483652Z","shell.execute_reply":"2023-11-04T22:26:46.782625Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN2\nmodel_nn2 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn2.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN2\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN2\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:10:45.938901Z","iopub.execute_input":"2023-11-04T22:10:45.939396Z","iopub.status.idle":"2023-11-04T22:11:04.625656Z","shell.execute_reply.started":"2023-11-04T22:10:45.939358Z","shell.execute_reply":"2023-11-04T22:11:04.624499Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN3\nmodel_nn3 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn3.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN3\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:11:16.299197Z","iopub.execute_input":"2023-11-04T22:11:16.299658Z","iopub.status.idle":"2023-11-04T22:11:34.444833Z","shell.execute_reply.started":"2023-11-04T22:11:16.299622Z","shell.execute_reply":"2023-11-04T22:11:34.443635Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN4\nmodel_nn4 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn4.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN4\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:11:45.900922Z","iopub.execute_input":"2023-11-04T22:11:45.901370Z","iopub.status.idle":"2023-11-04T22:12:04.427192Z","shell.execute_reply.started":"2023-11-04T22:11:45.901334Z","shell.execute_reply":"2023-11-04T22:12:04.426314Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#NN5\nmodel_nn5 = keras.Sequential([\n layers.Input(shape=(X_train.shape[1],)),\n layers.Dense(32, activation='relu'),\n layers.Dense(16, activation='relu'),\n layers.Dense(8, activation='relu'),\n layers.Dense(4, activation='relu'),\n layers.Dense(2, activation='relu'),\n layers.Dense(1) # For regression tasks\n])\n\nmodel_nn5.compile(optimizer='adam', loss='mean_squared_error')\n# Train NN5\nmodel_nn1.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)\n\n# Make predictions on the validation set\ny_val_pred_nn1 = model_nn1.predict(X_val)\n\n# Calculate R2, RMSE, and MAE for NN1\nr2_nn1 = r2_score(y_val, y_val_pred_nn1)\nrmse_nn1 = np.sqrt(mean_squared_error(y_val, y_val_pred_nn1))\nmae_nn1 = mean_absolute_error(y_val, y_val_pred_nn1)\n\nprint(\"NN1 Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1)\n# Make predictions on the test set for NN1\ny_test_pred_nn1 = model_nn1.predict(X_test)\n\n# Calculate R2, RMSE, and MAE for NN1 on the test data\nr2_nn1_test = r2_score(y_test, y_test_pred_nn1)\nrmse_nn1_test = np.sqrt(mean_squared_error(y_test, y_test_pred_nn1))\nmae_nn1_test = mean_absolute_error(y_test, y_test_pred_nn1)\n\nprint(\"NN1 Test Metrics:\")\nprint(\"R-squared (R2):\", r2_nn1_test)\nprint(\"Root Mean Squared Error (RMSE):\", rmse_nn1_test)\nprint(\"Mean Absolute Error (MAE):\", mae_nn1_test)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T22:12:16.183984Z","iopub.execute_input":"2023-11-04T22:12:16.185053Z","iopub.status.idle":"2023-11-04T22:12:34.721042Z","shell.execute_reply.started":"2023-11-04T22:12:16.185016Z","shell.execute_reply":"2023-11-04T22:12:34.720061Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Correlation","metadata":{}},{"cell_type":"code","source":"# Predict on the training set\ny_train_pred = svr_model.predict(X_train)\n\n# Calculate the correlation between predicted and actual values in the training set\nin_sample_correlation = np.corrcoef(y_train, y_train_pred)[0, 1]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:42:25.176562Z","iopub.execute_input":"2023-11-04T18:42:25.176994Z","iopub.status.idle":"2023-11-04T18:42:25.552089Z","shell.execute_reply.started":"2023-11-04T18:42:25.176960Z","shell.execute_reply":"2023-11-04T18:42:25.551070Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# You have already calculated y_val_pred, which are the predictions on the validation set.\n\n# Calculate the correlation between predicted and actual values in the validation set\nout_of_sample_correlation = np.corrcoef(y_test, y_test_pred)[0, 1]","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:44:25.712299Z","iopub.execute_input":"2023-11-04T18:44:25.712712Z","iopub.status.idle":"2023-11-04T18:44:25.718603Z","shell.execute_reply.started":"2023-11-04T18:44:25.712679Z","shell.execute_reply":"2023-11-04T18:44:25.717726Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Print both correlations\nprint(\"In-Sample Correlation:\", in_sample_correlation)\nprint(\"Out-of-Sample Correlation:\", out_of_sample_correlation)","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:44:29.978779Z","iopub.execute_input":"2023-11-04T18:44:29.979346Z","iopub.status.idle":"2023-11-04T18:44:29.986222Z","shell.execute_reply.started":"2023-11-04T18:44:29.979292Z","shell.execute_reply":"2023-11-04T18:44:29.985120Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a bar chart for in-sample and out-of-sample correlations\ncorrelations = [in_sample_correlation, out_of_sample_correlation]\nlabels = ['In-Sample', 'Out-of-Sample']\nx = np.arange(len(labels))\nplt.subplot(1, 2, 2) # Subplot for the correlations\nplt.bar(x, correlations, color=['blue', 'green'])\nplt.xticks(x, labels)\nplt.ylabel('Correlation Coefficient')\nplt.title('In-Sample vs. Out-of-Sample Correlations')\n\nplt.tight_layout() # Adjust layout for readability\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-04T18:47:30.085857Z","iopub.execute_input":"2023-11-04T18:47:30.086253Z","iopub.status.idle":"2023-11-04T18:47:30.424591Z","shell.execute_reply.started":"2023-11-04T18:47:30.086223Z","shell.execute_reply":"2023-11-04T18:47:30.423276Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Descriptive Statistics","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nimport seaborn as sns","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:01:55.397824Z","iopub.execute_input":"2023-11-09T13:01:55.398181Z","iopub.status.idle":"2023-11-09T13:01:55.740078Z","shell.execute_reply.started":"2023-11-09T13:01:55.398155Z","shell.execute_reply":"2023-11-09T13:01:55.738559Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data.head()","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:15.348406Z","iopub.execute_input":"2023-11-09T13:02:15.348795Z","iopub.status.idle":"2023-11-09T13:02:15.370575Z","shell.execute_reply.started":"2023-11-09T13:02:15.348765Z","shell.execute_reply":"2023-11-09T13:02:15.369277Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a new variable 'rm-rf' by subtracting 'rf' from 'rm'\ndata['rm-rf'] = data['rm'] - data['rf']","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:18.988633Z","iopub.execute_input":"2023-11-09T13:02:18.989231Z","iopub.status.idle":"2023-11-09T13:02:18.993605Z","shell.execute_reply.started":"2023-11-09T13:02:18.989202Z","shell.execute_reply":"2023-11-09T13:02:18.992773Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Select the variables you want to include in the correlation matrix\nfactors = ['rm-rf', 'SMB','HML','CMA','UMD','RMW','rME','IA','ROE','ROEG']","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:25.003477Z","iopub.execute_input":"2023-11-09T13:02:25.003848Z","iopub.status.idle":"2023-11-09T13:02:25.009219Z","shell.execute_reply.started":"2023-11-09T13:02:25.003820Z","shell.execute_reply":"2023-11-09T13:02:25.007656Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Calculate the correlation matrix for the selected variables\ncorr_matrix = data[factors].corr()\ncorr_matrix","metadata":{"execution":{"iopub.status.busy":"2023-11-09T13:02:35.653707Z","iopub.execute_input":"2023-11-09T13:02:35.654069Z","iopub.status.idle":"2023-11-09T13:02:35.680366Z","shell.execute_reply.started":"2023-11-09T13:02:35.654046Z","shell.execute_reply":"2023-11-09T13:02:35.679318Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a heatmap to visualize the correlation matrix\nplt.figure(figsize=(8, 6))\nsns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)\nplt.title('Correlation Plot for Selected Factors', pad=20) # Adjust the 'pad' parameter to control the distance\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-08T20:10:22.186529Z","iopub.execute_input":"2023-11-08T20:10:22.188010Z","iopub.status.idle":"2023-11-08T20:10:22.882816Z","shell.execute_reply.started":"2023-11-08T20:10:22.187937Z","shell.execute_reply":"2023-11-08T20:10:22.881073Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# plots","metadata":{}},{"cell_type":"code","source":"#q factor 2\n# Define independent variables (features) and dependent variable\nX = data[['rm-rf','rmf','rME', 'IA', 'ROE', 'ROEG']]\ny = data['rt']\n# Split the data into training (70%), validation (15%), and testing (15%) sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)\n\n# Create a Random Forest Regressor model\nrf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\nrf_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_rf = rf_model.predict(X_test)\n###GBRT\n# Create a Gradient Boosting Regressor model\ngbrt_model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n# Fit the model to the training data\ngbrt_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_gbrt = gbrt_model.predict(X_test)\n###XGB\n# Create an XGBRegressor model\nxgb_model = XGBRegressor(n_estimators=100, random_state=42)\n\n# Fit the model to the training data\nxgb_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_xgb = xgb_model.predict(X_test)\n###SVM\n# Create an SVR model\nsvr_model = SVR(kernel='linear', C=1.0)\n# Fit the model to the training data\nsvr_model.fit(X_train, y_train)\n# Now, evaluate the model on the testing set\ny_test_pred_svr = svr_model.predict(X_test)\n","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Create a single figure for all scatter plots\nplt.figure(figsize=(10, 8))\n# Set the main title for the entire figure\nplt.suptitle('Comparison of Q-Factors Model Predictions',fontsize=16, y=1.02)\n\n# Scatter plot for RF predictions on the test set\nplt.subplot(2, 2, 1)\nplt.scatter(y_test, y_test_pred_rf, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('RF Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for GBRT predictions on the test set\nplt.subplot(2, 2, 2)\nplt.scatter(y_test, y_test_pred_gbrt, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('GBRT Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for XGB predictions on the test set\nplt.subplot(2, 2, 3)\nplt.scatter(y_test, y_test_pred_xgb, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('XGBoost Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\n# Scatter plot for SVM predictions on the test set\nplt.subplot(2, 2, 4)\nplt.scatter(y_test, y_test_pred_svr, alpha=0.5)\nplt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')\nplt.title('SVM Testing: Predicted vs. Actual Returns')\nplt.xlabel('Returns')\nplt.ylabel('Predicted Returns')\n\nplt.tight_layout()\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2023-11-09T14:11:48.736716Z","iopub.execute_input":"2023-11-09T14:11:48.737086Z","iopub.status.idle":"2023-11-09T14:11:49.524759Z","shell.execute_reply.started":"2023-11-09T14:11:48.737058Z","shell.execute_reply":"2023-11-09T14:11:49.523323Z"},"trusted":true},"execution_count":null,"outputs":[]}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment