firmai/quant-rating-calculation.ipynb

## quant-rating-calculation.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Quant Rating Calculation.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "mount_file_id": "1L8FdqEzLSa9wqNfq4qVQKSDV9HvaCc6_",
      "authorship_tag": "ABX9TyMxusMOXSGLfDWdYsazgnLE",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/firmai/f16261734e1c2e4cbacc08ecf453de63/quant-rating-calculation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yuR7tYBvoxTp"
      },
      "source": [
        "## Quant Rating Calculation\n",
        "\n",
        "`Overall = Reputation Score 33% + Remuneration Score 33% + Quality Score 33% `"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "askumoNVJ0bl"
      },
      "source": [
        "import pandas as pd\n",
        "\n",
        "pre_l = \"https://github.com/firmai/random-assets-two/blob/master/quantrating/\"\n",
        "\n",
        "\n",
        "df_tfe = pd.read_csv(pre_l +\"data_tfetimes.csv?raw=true\")\n",
        "df_quant = pd.read_csv(pre_l +\"data_quantnet.csv?raw=true\")\n",
        "df_risk = pd.read_csv(pre_l +\"data_risk.csv?raw=true\")\n",
        "df_det = pd.read_excel(pre_l + \"risk_quantitative_finance.xlsx?raw=true\")\n",
        "df_risk_two = pd.read_csv(pre_l +\"data_risk2.csv?raw=true\")\n",
        "df_rank = pd.read_csv(pre_l +\"uni_rank.csv?raw=true\").set_index(\"University/institution\")\n",
        "\n",
        "\n",
        "df_det[\"Banking\"] = df_det[\"Graduates employed in\"].str.split(\"Banking: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det[\"Asset Management\"] = df_det[\"Graduates employed in\"].str.split(\"asset management: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det[\"Consulting\"] = df_det[\"Graduates employed in\"].str.split(\"consulting: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det[\"Fintech\"] = df_det[\"Graduates employed in\"].str.split(\"fintech: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det[\"Academia\"] = df_det[\"Graduates employed in\"].str.split(\"academia: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det[\"Other\"] = df_det[\"Graduates employed in\"].str.split(\"other: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
        "df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") &  (df_det[\"sheetnames\"]==\"Master’s in Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\"  \n",
        "df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") &  (df_det[\"sheetnames\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
        "df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") &  (df_det[\"sheetnames\"]==\"MSc in Mathematical Finance\")].index.values[0], 1] = \"University of York (Mathematical Finance)\"  \n",
        "df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") &  (df_det[\"sheetnames\"]==\"MSc in Financial Engineering\")].index.values[0], 1] = \"University of York (Quantitative Finance)\" \n",
        "df_det = df_det.set_index(\"Name\")\n",
        "df_det[\"Most-cited lecturers\"] = df_det[\"Most-cited lecturers\"].replace(',','', regex=True)\n",
        "df_det[\"Average Citations\"] = df_det[\"Most-cited lecturers\"].str.extractall('(\\d+)').unstack().loc[:,0].astype(float).mean(axis=1,skipna=True)\n",
        "# both have 96 in the fall\n",
        "df_det.loc[\"University of California, Berkeley (Haas School of Business)\",\"Full-time students\"] = 96\n",
        "df_det.loc[\"New York University (Tandon School of Engineering)\",\"Full-time students\"] = 96\n",
        "df_det[\"Staff to Students\"]  = df_det[\"Teaching staff\"] / df_det[\"Full-time students\"]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UF30mEvWN5Jd"
      },
      "source": [
        "import numpy as np\n",
        "df_quant = df_quant.replace({\"-\":np.nan})\n",
        "def ser_perce(ser):\n",
        "  ser = ser.str.rstrip('%')\n",
        "  ser = ser.fillna(ser.median()).astype('float') / 100.0\n",
        "  return ser\n",
        "\n",
        "df_quant['emplm_rate_grad']  = ser_perce(df_quant['emplm_rate_grad'] )\n",
        "df_quant['emplm_rate_3_month_aft'] = ser_perce(df_quant['emplm_rate_3_month_aft'] )\n",
        "df_quant['avr_start_salary_bonus'] = df_quant['avr_start_salary_bonus'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
        "df_quant['tuition'] = df_quant['tuition'].str.split().str[0]\n",
        "df_quant['tuition'] = df_quant['tuition'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
        "\n",
        "df_quant['cohort_size'] = df_quant['cohort_size'].str.split().str[0].astype(int)\n",
        "\n",
        "df_quant = df_quant.fillna(df_quant.median())\n",
        "voila = df_quant[[\"peer_assessment\",\"emplm_rate_grad\",\"emplm_rate_3_month_aft\",\"avr_start_salary_bonus\",\"avr_gre_quant\",\"tuition\",\"cohort_size\"]]\n",
        "voila = (voila-voila.min())/(voila.max()-voila.min())\n",
        "voila = voila * (1 - 0) + 0\n",
        "\n",
        "vals =  voila[\"emplm_rate_grad\"] * 0.10 + voila[\"emplm_rate_3_month_aft\"] * 0.05 + voila[\"avr_start_salary_bonus\"] * 0.25 + voila[\"avr_start_salary_bonus\"] *  0.15"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rNNXkfEZXKsZ"
      },
      "source": [
        "reppy = {\"Boston University\":\"Boston University (Questrom School of Business)\", \"NYU Tandon School of Engineering\":\"New York University (Tandon School of Engineering)\" , \"New York University\":\"New York University (Courant Institute of Mathematical Sciences)\" , \"University of California, Berkeley\":\"University of California, Berkeley (Haas School of Business)\", \n",
        "         \"University of California, Los Angeles\":\"University of California, Los Angeles (Anderson School of Management)\", \"Illinois Institute of Technology\": \"University of Illinois at Urbana-Champaign\", \"Princeton University\": \"Princeton University (Bendheim Center for Finance)\"}\n",
        "\n",
        "df_quant = df_quant.replace(reppy)\n",
        "\n",
        "df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Columbia University\") &  (df_quant[\"program\"]==\"Financial Engineering\")].index.values[0], 1] = \"Columbia University (Columbia Engineering)\"\n",
        "\n",
        "\n",
        "df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") &  (df_quant[\"program\"]==\"Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\"  \n",
        "df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") &  (df_quant[\"program\"]==\"Quantitative Finance\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
        "df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") &  (df_risk[\"Programme\"]==\"Master’s in Mathematical Finance\")].index.values[0], 0] = \"Rutgers University (Mathematical Finance)\"  \n",
        "df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") &  (df_risk[\"Programme\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 0] = \"Rutgers University (Quantitative Finance)\" \n",
        "df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") &  (df_risk[\"Programme\"]==\"MSc in Mathematical Finance\")].index.values[0], 0] = \"University of York (Mathematical Finance)\"  \n",
        "df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") &  (df_risk[\"Programme\"]==\"MSc in Financial Engineering\")].index.values[0], 0] = \"University of York (Quantitative Finance)\" \n",
        "\n",
        "df_risk['Accept. rate']  = ser_perce(df_risk['Accept. rate'] )\n",
        "df_risk['Empl. rate'] = ser_perce(df_risk['Empl. rate'] )\n",
        "\n",
        "df_risk['Salary ($)'] = df_risk['Salary ($)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
        "\n",
        "df_risk = df_risk.fillna(df_risk.median())\n",
        "df_risk_two[\"Accept. rate\"] = ser_perce(df_risk_two[\"Accept. rate\"] )\n",
        "df_risk_two[\"Students accepting\"] = ser_perce(df_risk_two[\"Students accepting\"] )\n",
        "df_risk_two[\"Employment rate\"] = ser_perce(df_risk_two[\"Employment rate\"] )\n",
        "df_risk_two[\"Industry-affiliated lecturers\"] = ser_perce(df_risk_two[\"Industry-affiliated lecturers\"] )\n",
        "df_risk_two[\"Salary (US$, adjusted)\"] = df_risk_two['Salary (US$, adjusted)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
        "df_risk_two = df_risk_two.fillna(df_risk_two.median())"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vTANblkqgPjq"
      },
      "source": [
        "df_risk_two = df_risk_two.drop([\"Accept. rate\", \"Class size\", \"Salary (US$, adjusted)\"], axis=1)\n",
        "df_risk_merge = pd.merge(df_risk, df_risk_two, on=\"University/institution\",how=\"left\")\n",
        "df_risk_merge = pd.merge(df_risk_merge, df_det[[\"Average Citations\", \"Staff to Students\"]],left_on=\"University/institution\", right_index=True,how=\"left\")\n",
        "df_risk_merge = df_risk_merge.fillna(df_risk_merge.median())"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ULZO466AeXwr"
      },
      "source": [
        "full_fang = pd.merge(df_risk_merge, df_quant, left_on=\"University/institution\", right_on=\"uni_name\",how=\"left\")\n",
        "full_fang = full_fang.fillna(full_fang.median())\n",
        "full_fang[\"salary_descrepancy\"] = (full_fang[\"avr_start_salary_bonus\"] - full_fang[\"Salary ($)\"]).abs()\n",
        "full_fang.loc[full_fang['Country'] != 'US', 'salary_descrepancy'] = np.nan\n",
        "full_fang = full_fang.fillna(full_fang.median())\n",
        "full_fang[\"class_descrepancy\"] = (full_fang[\"Class size\"] - full_fang[\"cohort_size\"]).abs()\n",
        "full_fang.loc[full_fang['Country'] != 'US', 'class_descrepancy'] = np.nan\n",
        "full_fang[\"employment_descrepancy\"] = (full_fang[\"Empl. rate\"] - full_fang[\"emplm_rate_3_month_aft\"]).abs()\n",
        "full_fang.loc[full_fang['Country'] != 'US', 'employment_descrepancy'] = np.nan\n",
        "full_fang = full_fang.fillna(full_fang.median())\n",
        "full_fang = full_fang.set_index(\"University/institution\")\n",
        "full_fang = full_fang[full_fang.select_dtypes(include=np.number).columns.tolist()]\n",
        "bool_df = full_fang.copy()\n",
        "for col in bool_df.columns:\n",
        "  bool_df[col] = bool_df[col].duplicated()\n",
        "  \n",
        "bool_df = bool_df.replace({False:0, True:1})\n",
        "full_fang[\"missing_data\"] = bool_df.sum(axis=1)\n",
        "\n",
        "full_fang = pd.merge(full_fang,df_rank, left_index=True, right_index=True)\n",
        "# full_fang = full_fang.drop('University of York') # replicated fix later\n",
        "normalized_df=(full_fang-full_fang.min())/(full_fang.max()-full_fang.min())"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yq7WL6hvr3bJ"
      },
      "source": [
        "reputation = normalized_df[\"peer_assessment\"]*.20 + normalized_df[\"avr_gre_quant\"]*.20 + (1-(normalized_df[\"salary_descrepancy\"]+normalized_df[\"class_descrepancy\"]+normalized_df[\"employment_descrepancy\"]+normalized_df[\"missing_data\"])/4)*.20 + (1-normalized_df[\"Ranking\"])*.20 + normalized_df[\"Students accepting\"]*0.05 + (1-normalized_df[\"Accept. rate\"])*0.15\n",
        "remuneration = (normalized_df[\"Salary ($)\"] + normalized_df[\"avr_start_salary_bonus\"])/2*.50 + (normalized_df[\"Empl. rate\"] + normalized_df[\"emplm_rate_3_month_aft\"] + normalized_df[\"emplm_rate_grad\"])/3*.25 +(1-normalized_df[\"tuition\"])*.25\n",
        "quality = normalized_df[\"Industry-affiliated lecturers\"]*.40 + (1- (normalized_df[\"Class size\"]))*.20 + normalized_df[\"Staff to Students\"]*.20 +  normalized_df[\"Average Citations\"]*.20\n",
        "\n",
        "df_new = reputation.to_frame()\n",
        "df_new.columns = [\"Reputation\"]\n",
        "df_new[\"Remuneration\"] = remuneration.values\n",
        "df_new[\"Quality\"] = quality.values\n",
        "df_new_norm =(df_new-df_new.min())/(df_new.max()-df_new.min())\n",
        "df_new_norm[\"Final\"] = df_new_norm.sum(axis=1)/3\n",
        "\n",
        "df_all =  pd.merge(pd.merge(df_new_norm[[\"Final\",\"Reputation\",\"Remuneration\",\"Quality\"]],full_fang, left_index=True,right_index=True, how=\"left\"), df_det.drop([\"Employment rate\", \"Average Citations\",  \"Staff to Students\"],axis=1), left_index=True, right_index=True, how=\"left\")\n",
        "df_all = df_all.round(2)\n",
        "df_all.sort_values(\"Final\",ascending=False).reset_index().to_csv(\"quant_rating.csv\", index=False)"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Quant Rating Calculation.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"mount_file_id": "1L8FdqEzLSa9wqNfq4qVQKSDV9HvaCc6_",
	"authorship_tag": "ABX9TyMxusMOXSGLfDWdYsazgnLE",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/firmai/f16261734e1c2e4cbacc08ecf453de63/quant-rating-calculation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "yuR7tYBvoxTp"
	},
	"source": [
	"## Quant Rating Calculation\n",
	"\n",
	"`Overall = Reputation Score 33% + Remuneration Score 33% + Quality Score 33% `"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "askumoNVJ0bl"
	},
	"source": [
	"import pandas as pd\n",
	"\n",
	"pre_l = \"https://github.com/firmai/random-assets-two/blob/master/quantrating/\"\n",
	"\n",
	"\n",
	"df_tfe = pd.read_csv(pre_l +\"data_tfetimes.csv?raw=true\")\n",
	"df_quant = pd.read_csv(pre_l +\"data_quantnet.csv?raw=true\")\n",
	"df_risk = pd.read_csv(pre_l +\"data_risk.csv?raw=true\")\n",
	"df_det = pd.read_excel(pre_l + \"risk_quantitative_finance.xlsx?raw=true\")\n",
	"df_risk_two = pd.read_csv(pre_l +\"data_risk2.csv?raw=true\")\n",
	"df_rank = pd.read_csv(pre_l +\"uni_rank.csv?raw=true\").set_index(\"University/institution\")\n",
	"\n",
	"\n",
	"df_det[\"Banking\"] = df_det[\"Graduates employed in\"].str.split(\"Banking: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det[\"Asset Management\"] = df_det[\"Graduates employed in\"].str.split(\"asset management: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det[\"Consulting\"] = df_det[\"Graduates employed in\"].str.split(\"consulting: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det[\"Fintech\"] = df_det[\"Graduates employed in\"].str.split(\"fintech: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det[\"Academia\"] = df_det[\"Graduates employed in\"].str.split(\"academia: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det[\"Other\"] = df_det[\"Graduates employed in\"].str.split(\"other: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
	"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n",
	"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
	"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Mathematical Finance\")].index.values[0], 1] = \"University of York (Mathematical Finance)\" \n",
	"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Financial Engineering\")].index.values[0], 1] = \"University of York (Quantitative Finance)\" \n",
	"df_det = df_det.set_index(\"Name\")\n",
	"df_det[\"Most-cited lecturers\"] = df_det[\"Most-cited lecturers\"].replace(',','', regex=True)\n",
	"df_det[\"Average Citations\"] = df_det[\"Most-cited lecturers\"].str.extractall('(\\d+)').unstack().loc[:,0].astype(float).mean(axis=1,skipna=True)\n",
	"# both have 96 in the fall\n",
	"df_det.loc[\"University of California, Berkeley (Haas School of Business)\",\"Full-time students\"] = 96\n",
	"df_det.loc[\"New York University (Tandon School of Engineering)\",\"Full-time students\"] = 96\n",
	"df_det[\"Staff to Students\"] = df_det[\"Teaching staff\"] / df_det[\"Full-time students\"]"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "UF30mEvWN5Jd"
	},
	"source": [
	"import numpy as np\n",
	"df_quant = df_quant.replace({\"-\":np.nan})\n",
	"def ser_perce(ser):\n",
	" ser = ser.str.rstrip('%')\n",
	" ser = ser.fillna(ser.median()).astype('float') / 100.0\n",
	" return ser\n",
	"\n",
	"df_quant['emplm_rate_grad'] = ser_perce(df_quant['emplm_rate_grad'] )\n",
	"df_quant['emplm_rate_3_month_aft'] = ser_perce(df_quant['emplm_rate_3_month_aft'] )\n",
	"df_quant['avr_start_salary_bonus'] = df_quant['avr_start_salary_bonus'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
	"df_quant['tuition'] = df_quant['tuition'].str.split().str[0]\n",
	"df_quant['tuition'] = df_quant['tuition'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
	"\n",
	"df_quant['cohort_size'] = df_quant['cohort_size'].str.split().str[0].astype(int)\n",
	"\n",
	"df_quant = df_quant.fillna(df_quant.median())\n",
	"voila = df_quant[[\"peer_assessment\",\"emplm_rate_grad\",\"emplm_rate_3_month_aft\",\"avr_start_salary_bonus\",\"avr_gre_quant\",\"tuition\",\"cohort_size\"]]\n",
	"voila = (voila-voila.min())/(voila.max()-voila.min())\n",
	"voila = voila * (1 - 0) + 0\n",
	"\n",
	"vals = voila[\"emplm_rate_grad\"] * 0.10 + voila[\"emplm_rate_3_month_aft\"] * 0.05 + voila[\"avr_start_salary_bonus\"] * 0.25 + voila[\"avr_start_salary_bonus\"] * 0.15"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "rNNXkfEZXKsZ"
	},
	"source": [
	"reppy = {\"Boston University\":\"Boston University (Questrom School of Business)\", \"NYU Tandon School of Engineering\":\"New York University (Tandon School of Engineering)\" , \"New York University\":\"New York University (Courant Institute of Mathematical Sciences)\" , \"University of California, Berkeley\":\"University of California, Berkeley (Haas School of Business)\", \n",
	" \"University of California, Los Angeles\":\"University of California, Los Angeles (Anderson School of Management)\", \"Illinois Institute of Technology\": \"University of Illinois at Urbana-Champaign\", \"Princeton University\": \"Princeton University (Bendheim Center for Finance)\"}\n",
	"\n",
	"df_quant = df_quant.replace(reppy)\n",
	"\n",
	"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Columbia University\") & (df_quant[\"program\"]==\"Financial Engineering\")].index.values[0], 1] = \"Columbia University (Columbia Engineering)\"\n",
	"\n",
	"\n",
	"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n",
	"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Quantitative Finance\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
	"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Mathematical Finance\")].index.values[0], 0] = \"Rutgers University (Mathematical Finance)\" \n",
	"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 0] = \"Rutgers University (Quantitative Finance)\" \n",
	"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Mathematical Finance\")].index.values[0], 0] = \"University of York (Mathematical Finance)\" \n",
	"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Financial Engineering\")].index.values[0], 0] = \"University of York (Quantitative Finance)\" \n",
	"\n",
	"df_risk['Accept. rate'] = ser_perce(df_risk['Accept. rate'] )\n",
	"df_risk['Empl. rate'] = ser_perce(df_risk['Empl. rate'] )\n",
	"\n",
	"df_risk['Salary ($)'] = df_risk['Salary ($)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
	"\n",
	"df_risk = df_risk.fillna(df_risk.median())\n",
	"df_risk_two[\"Accept. rate\"] = ser_perce(df_risk_two[\"Accept. rate\"] )\n",
	"df_risk_two[\"Students accepting\"] = ser_perce(df_risk_two[\"Students accepting\"] )\n",
	"df_risk_two[\"Employment rate\"] = ser_perce(df_risk_two[\"Employment rate\"] )\n",
	"df_risk_two[\"Industry-affiliated lecturers\"] = ser_perce(df_risk_two[\"Industry-affiliated lecturers\"] )\n",
	"df_risk_two[\"Salary (US$, adjusted)\"] = df_risk_two['Salary (US$, adjusted)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
	"df_risk_two = df_risk_two.fillna(df_risk_two.median())"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "vTANblkqgPjq"
	},
	"source": [
	"df_risk_two = df_risk_two.drop([\"Accept. rate\", \"Class size\", \"Salary (US$, adjusted)\"], axis=1)\n",
	"df_risk_merge = pd.merge(df_risk, df_risk_two, on=\"University/institution\",how=\"left\")\n",
	"df_risk_merge = pd.merge(df_risk_merge, df_det[[\"Average Citations\", \"Staff to Students\"]],left_on=\"University/institution\", right_index=True,how=\"left\")\n",
	"df_risk_merge = df_risk_merge.fillna(df_risk_merge.median())"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "ULZO466AeXwr"
	},
	"source": [
	"full_fang = pd.merge(df_risk_merge, df_quant, left_on=\"University/institution\", right_on=\"uni_name\",how=\"left\")\n",
	"full_fang = full_fang.fillna(full_fang.median())\n",
	"full_fang[\"salary_descrepancy\"] = (full_fang[\"avr_start_salary_bonus\"] - full_fang[\"Salary ($)\"]).abs()\n",
	"full_fang.loc[full_fang['Country'] != 'US', 'salary_descrepancy'] = np.nan\n",
	"full_fang = full_fang.fillna(full_fang.median())\n",
	"full_fang[\"class_descrepancy\"] = (full_fang[\"Class size\"] - full_fang[\"cohort_size\"]).abs()\n",
	"full_fang.loc[full_fang['Country'] != 'US', 'class_descrepancy'] = np.nan\n",
	"full_fang[\"employment_descrepancy\"] = (full_fang[\"Empl. rate\"] - full_fang[\"emplm_rate_3_month_aft\"]).abs()\n",
	"full_fang.loc[full_fang['Country'] != 'US', 'employment_descrepancy'] = np.nan\n",
	"full_fang = full_fang.fillna(full_fang.median())\n",
	"full_fang = full_fang.set_index(\"University/institution\")\n",
	"full_fang = full_fang[full_fang.select_dtypes(include=np.number).columns.tolist()]\n",
	"bool_df = full_fang.copy()\n",
	"for col in bool_df.columns:\n",
	" bool_df[col] = bool_df[col].duplicated()\n",
	" \n",
	"bool_df = bool_df.replace({False:0, True:1})\n",
	"full_fang[\"missing_data\"] = bool_df.sum(axis=1)\n",
	"\n",
	"full_fang = pd.merge(full_fang,df_rank, left_index=True, right_index=True)\n",
	"# full_fang = full_fang.drop('University of York') # replicated fix later\n",
	"normalized_df=(full_fang-full_fang.min())/(full_fang.max()-full_fang.min())"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "yq7WL6hvr3bJ"
	},
	"source": [
	"reputation = normalized_df[\"peer_assessment\"].20 + normalized_df[\"avr_gre_quant\"].20 + (1-(normalized_df[\"salary_descrepancy\"]+normalized_df[\"class_descrepancy\"]+normalized_df[\"employment_descrepancy\"]+normalized_df[\"missing_data\"])/4).20 + (1-normalized_df[\"Ranking\"]).20 + normalized_df[\"Students accepting\"]0.05 + (1-normalized_df[\"Accept. rate\"])0.15\n",
	"remuneration = (normalized_df[\"Salary ($)\"] + normalized_df[\"avr_start_salary_bonus\"])/2.50 + (normalized_df[\"Empl. rate\"] + normalized_df[\"emplm_rate_3_month_aft\"] + normalized_df[\"emplm_rate_grad\"])/3.25 +(1-normalized_df[\"tuition\"])*.25\n",
	"quality = normalized_df[\"Industry-affiliated lecturers\"].40 + (1- (normalized_df[\"Class size\"])).20 + normalized_df[\"Staff to Students\"].20 + normalized_df[\"Average Citations\"].20\n",
	"\n",
	"df_new = reputation.to_frame()\n",
	"df_new.columns = [\"Reputation\"]\n",
	"df_new[\"Remuneration\"] = remuneration.values\n",
	"df_new[\"Quality\"] = quality.values\n",
	"df_new_norm =(df_new-df_new.min())/(df_new.max()-df_new.min())\n",
	"df_new_norm[\"Final\"] = df_new_norm.sum(axis=1)/3\n",
	"\n",
	"df_all = pd.merge(pd.merge(df_new_norm[[\"Final\",\"Reputation\",\"Remuneration\",\"Quality\"]],full_fang, left_index=True,right_index=True, how=\"left\"), df_det.drop([\"Employment rate\", \"Average Citations\", \"Staff to Students\"],axis=1), left_index=True, right_index=True, how=\"left\")\n",
	"df_all = df_all.round(2)\n",
	"df_all.sort_values(\"Final\",ascending=False).reset_index().to_csv(\"quant_rating.csv\", index=False)"
	],
	"execution_count": null,
	"outputs": []
	}
	]
	}