Skip to content

Instantly share code, notes, and snippets.

@firmai
Created October 27, 2021 12:09
Show Gist options
  • Save firmai/f16261734e1c2e4cbacc08ecf453de63 to your computer and use it in GitHub Desktop.
Save firmai/f16261734e1c2e4cbacc08ecf453de63 to your computer and use it in GitHub Desktop.
Quant Rating Calculation.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Quant Rating Calculation.ipynb",
"provenance": [],
"collapsed_sections": [],
"mount_file_id": "1L8FdqEzLSa9wqNfq4qVQKSDV9HvaCc6_",
"authorship_tag": "ABX9TyMxusMOXSGLfDWdYsazgnLE",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/firmai/f16261734e1c2e4cbacc08ecf453de63/quant-rating-calculation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yuR7tYBvoxTp"
},
"source": [
"## Quant Rating Calculation\n",
"\n",
"`Overall = Reputation Score 33% + Remuneration Score 33% + Quality Score 33% `"
]
},
{
"cell_type": "code",
"metadata": {
"id": "askumoNVJ0bl"
},
"source": [
"import pandas as pd\n",
"\n",
"pre_l = \"https://github.com/firmai/random-assets-two/blob/master/quantrating/\"\n",
"\n",
"\n",
"df_tfe = pd.read_csv(pre_l +\"data_tfetimes.csv?raw=true\")\n",
"df_quant = pd.read_csv(pre_l +\"data_quantnet.csv?raw=true\")\n",
"df_risk = pd.read_csv(pre_l +\"data_risk.csv?raw=true\")\n",
"df_det = pd.read_excel(pre_l + \"risk_quantitative_finance.xlsx?raw=true\")\n",
"df_risk_two = pd.read_csv(pre_l +\"data_risk2.csv?raw=true\")\n",
"df_rank = pd.read_csv(pre_l +\"uni_rank.csv?raw=true\").set_index(\"University/institution\")\n",
"\n",
"\n",
"df_det[\"Banking\"] = df_det[\"Graduates employed in\"].str.split(\"Banking: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det[\"Asset Management\"] = df_det[\"Graduates employed in\"].str.split(\"asset management: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det[\"Consulting\"] = df_det[\"Graduates employed in\"].str.split(\"consulting: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det[\"Fintech\"] = df_det[\"Graduates employed in\"].str.split(\"fintech: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det[\"Academia\"] = df_det[\"Graduates employed in\"].str.split(\"academia: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det[\"Other\"] = df_det[\"Graduates employed in\"].str.split(\"other: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n",
"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n",
"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Mathematical Finance\")].index.values[0], 1] = \"University of York (Mathematical Finance)\" \n",
"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Financial Engineering\")].index.values[0], 1] = \"University of York (Quantitative Finance)\" \n",
"df_det = df_det.set_index(\"Name\")\n",
"df_det[\"Most-cited lecturers\"] = df_det[\"Most-cited lecturers\"].replace(',','', regex=True)\n",
"df_det[\"Average Citations\"] = df_det[\"Most-cited lecturers\"].str.extractall('(\\d+)').unstack().loc[:,0].astype(float).mean(axis=1,skipna=True)\n",
"# both have 96 in the fall\n",
"df_det.loc[\"University of California, Berkeley (Haas School of Business)\",\"Full-time students\"] = 96\n",
"df_det.loc[\"New York University (Tandon School of Engineering)\",\"Full-time students\"] = 96\n",
"df_det[\"Staff to Students\"] = df_det[\"Teaching staff\"] / df_det[\"Full-time students\"]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "UF30mEvWN5Jd"
},
"source": [
"import numpy as np\n",
"df_quant = df_quant.replace({\"-\":np.nan})\n",
"def ser_perce(ser):\n",
" ser = ser.str.rstrip('%')\n",
" ser = ser.fillna(ser.median()).astype('float') / 100.0\n",
" return ser\n",
"\n",
"df_quant['emplm_rate_grad'] = ser_perce(df_quant['emplm_rate_grad'] )\n",
"df_quant['emplm_rate_3_month_aft'] = ser_perce(df_quant['emplm_rate_3_month_aft'] )\n",
"df_quant['avr_start_salary_bonus'] = df_quant['avr_start_salary_bonus'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
"df_quant['tuition'] = df_quant['tuition'].str.split().str[0]\n",
"df_quant['tuition'] = df_quant['tuition'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
"\n",
"df_quant['cohort_size'] = df_quant['cohort_size'].str.split().str[0].astype(int)\n",
"\n",
"df_quant = df_quant.fillna(df_quant.median())\n",
"voila = df_quant[[\"peer_assessment\",\"emplm_rate_grad\",\"emplm_rate_3_month_aft\",\"avr_start_salary_bonus\",\"avr_gre_quant\",\"tuition\",\"cohort_size\"]]\n",
"voila = (voila-voila.min())/(voila.max()-voila.min())\n",
"voila = voila * (1 - 0) + 0\n",
"\n",
"vals = voila[\"emplm_rate_grad\"] * 0.10 + voila[\"emplm_rate_3_month_aft\"] * 0.05 + voila[\"avr_start_salary_bonus\"] * 0.25 + voila[\"avr_start_salary_bonus\"] * 0.15"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rNNXkfEZXKsZ"
},
"source": [
"reppy = {\"Boston University\":\"Boston University (Questrom School of Business)\", \"NYU Tandon School of Engineering\":\"New York University (Tandon School of Engineering)\" , \"New York University\":\"New York University (Courant Institute of Mathematical Sciences)\" , \"University of California, Berkeley\":\"University of California, Berkeley (Haas School of Business)\", \n",
" \"University of California, Los Angeles\":\"University of California, Los Angeles (Anderson School of Management)\", \"Illinois Institute of Technology\": \"University of Illinois at Urbana-Champaign\", \"Princeton University\": \"Princeton University (Bendheim Center for Finance)\"}\n",
"\n",
"df_quant = df_quant.replace(reppy)\n",
"\n",
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Columbia University\") & (df_quant[\"program\"]==\"Financial Engineering\")].index.values[0], 1] = \"Columbia University (Columbia Engineering)\"\n",
"\n",
"\n",
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n",
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Quantitative Finance\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n",
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Mathematical Finance\")].index.values[0], 0] = \"Rutgers University (Mathematical Finance)\" \n",
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 0] = \"Rutgers University (Quantitative Finance)\" \n",
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Mathematical Finance\")].index.values[0], 0] = \"University of York (Mathematical Finance)\" \n",
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Financial Engineering\")].index.values[0], 0] = \"University of York (Quantitative Finance)\" \n",
"\n",
"df_risk['Accept. rate'] = ser_perce(df_risk['Accept. rate'] )\n",
"df_risk['Empl. rate'] = ser_perce(df_risk['Empl. rate'] )\n",
"\n",
"df_risk['Salary ($)'] = df_risk['Salary ($)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
"\n",
"df_risk = df_risk.fillna(df_risk.median())\n",
"df_risk_two[\"Accept. rate\"] = ser_perce(df_risk_two[\"Accept. rate\"] )\n",
"df_risk_two[\"Students accepting\"] = ser_perce(df_risk_two[\"Students accepting\"] )\n",
"df_risk_two[\"Employment rate\"] = ser_perce(df_risk_two[\"Employment rate\"] )\n",
"df_risk_two[\"Industry-affiliated lecturers\"] = ser_perce(df_risk_two[\"Industry-affiliated lecturers\"] )\n",
"df_risk_two[\"Salary (US$, adjusted)\"] = df_risk_two['Salary (US$, adjusted)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n",
"df_risk_two = df_risk_two.fillna(df_risk_two.median())"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "vTANblkqgPjq"
},
"source": [
"df_risk_two = df_risk_two.drop([\"Accept. rate\", \"Class size\", \"Salary (US$, adjusted)\"], axis=1)\n",
"df_risk_merge = pd.merge(df_risk, df_risk_two, on=\"University/institution\",how=\"left\")\n",
"df_risk_merge = pd.merge(df_risk_merge, df_det[[\"Average Citations\", \"Staff to Students\"]],left_on=\"University/institution\", right_index=True,how=\"left\")\n",
"df_risk_merge = df_risk_merge.fillna(df_risk_merge.median())"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ULZO466AeXwr"
},
"source": [
"full_fang = pd.merge(df_risk_merge, df_quant, left_on=\"University/institution\", right_on=\"uni_name\",how=\"left\")\n",
"full_fang = full_fang.fillna(full_fang.median())\n",
"full_fang[\"salary_descrepancy\"] = (full_fang[\"avr_start_salary_bonus\"] - full_fang[\"Salary ($)\"]).abs()\n",
"full_fang.loc[full_fang['Country'] != 'US', 'salary_descrepancy'] = np.nan\n",
"full_fang = full_fang.fillna(full_fang.median())\n",
"full_fang[\"class_descrepancy\"] = (full_fang[\"Class size\"] - full_fang[\"cohort_size\"]).abs()\n",
"full_fang.loc[full_fang['Country'] != 'US', 'class_descrepancy'] = np.nan\n",
"full_fang[\"employment_descrepancy\"] = (full_fang[\"Empl. rate\"] - full_fang[\"emplm_rate_3_month_aft\"]).abs()\n",
"full_fang.loc[full_fang['Country'] != 'US', 'employment_descrepancy'] = np.nan\n",
"full_fang = full_fang.fillna(full_fang.median())\n",
"full_fang = full_fang.set_index(\"University/institution\")\n",
"full_fang = full_fang[full_fang.select_dtypes(include=np.number).columns.tolist()]\n",
"bool_df = full_fang.copy()\n",
"for col in bool_df.columns:\n",
" bool_df[col] = bool_df[col].duplicated()\n",
" \n",
"bool_df = bool_df.replace({False:0, True:1})\n",
"full_fang[\"missing_data\"] = bool_df.sum(axis=1)\n",
"\n",
"full_fang = pd.merge(full_fang,df_rank, left_index=True, right_index=True)\n",
"# full_fang = full_fang.drop('University of York') # replicated fix later\n",
"normalized_df=(full_fang-full_fang.min())/(full_fang.max()-full_fang.min())"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yq7WL6hvr3bJ"
},
"source": [
"reputation = normalized_df[\"peer_assessment\"]*.20 + normalized_df[\"avr_gre_quant\"]*.20 + (1-(normalized_df[\"salary_descrepancy\"]+normalized_df[\"class_descrepancy\"]+normalized_df[\"employment_descrepancy\"]+normalized_df[\"missing_data\"])/4)*.20 + (1-normalized_df[\"Ranking\"])*.20 + normalized_df[\"Students accepting\"]*0.05 + (1-normalized_df[\"Accept. rate\"])*0.15\n",
"remuneration = (normalized_df[\"Salary ($)\"] + normalized_df[\"avr_start_salary_bonus\"])/2*.50 + (normalized_df[\"Empl. rate\"] + normalized_df[\"emplm_rate_3_month_aft\"] + normalized_df[\"emplm_rate_grad\"])/3*.25 +(1-normalized_df[\"tuition\"])*.25\n",
"quality = normalized_df[\"Industry-affiliated lecturers\"]*.40 + (1- (normalized_df[\"Class size\"]))*.20 + normalized_df[\"Staff to Students\"]*.20 + normalized_df[\"Average Citations\"]*.20\n",
"\n",
"df_new = reputation.to_frame()\n",
"df_new.columns = [\"Reputation\"]\n",
"df_new[\"Remuneration\"] = remuneration.values\n",
"df_new[\"Quality\"] = quality.values\n",
"df_new_norm =(df_new-df_new.min())/(df_new.max()-df_new.min())\n",
"df_new_norm[\"Final\"] = df_new_norm.sum(axis=1)/3\n",
"\n",
"df_all = pd.merge(pd.merge(df_new_norm[[\"Final\",\"Reputation\",\"Remuneration\",\"Quality\"]],full_fang, left_index=True,right_index=True, how=\"left\"), df_det.drop([\"Employment rate\", \"Average Citations\", \"Staff to Students\"],axis=1), left_index=True, right_index=True, how=\"left\")\n",
"df_all = df_all.round(2)\n",
"df_all.sort_values(\"Final\",ascending=False).reset_index().to_csv(\"quant_rating.csv\", index=False)"
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment