Created
October 27, 2021 12:09
-
-
Save firmai/f16261734e1c2e4cbacc08ecf453de63 to your computer and use it in GitHub Desktop.
Quant Rating Calculation.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Quant Rating Calculation.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"mount_file_id": "1L8FdqEzLSa9wqNfq4qVQKSDV9HvaCc6_", | |
"authorship_tag": "ABX9TyMxusMOXSGLfDWdYsazgnLE", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/firmai/f16261734e1c2e4cbacc08ecf453de63/quant-rating-calculation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yuR7tYBvoxTp" | |
}, | |
"source": [ | |
"## Quant Rating Calculation\n", | |
"\n", | |
"`Overall = Reputation Score 33% + Remuneration Score 33% + Quality Score 33% `" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "askumoNVJ0bl" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"pre_l = \"https://github.com/firmai/random-assets-two/blob/master/quantrating/\"\n", | |
"\n", | |
"\n", | |
"df_tfe = pd.read_csv(pre_l +\"data_tfetimes.csv?raw=true\")\n", | |
"df_quant = pd.read_csv(pre_l +\"data_quantnet.csv?raw=true\")\n", | |
"df_risk = pd.read_csv(pre_l +\"data_risk.csv?raw=true\")\n", | |
"df_det = pd.read_excel(pre_l + \"risk_quantitative_finance.xlsx?raw=true\")\n", | |
"df_risk_two = pd.read_csv(pre_l +\"data_risk2.csv?raw=true\")\n", | |
"df_rank = pd.read_csv(pre_l +\"uni_rank.csv?raw=true\").set_index(\"University/institution\")\n", | |
"\n", | |
"\n", | |
"df_det[\"Banking\"] = df_det[\"Graduates employed in\"].str.split(\"Banking: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det[\"Asset Management\"] = df_det[\"Graduates employed in\"].str.split(\"asset management: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det[\"Consulting\"] = df_det[\"Graduates employed in\"].str.split(\"consulting: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det[\"Fintech\"] = df_det[\"Graduates employed in\"].str.split(\"fintech: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det[\"Academia\"] = df_det[\"Graduates employed in\"].str.split(\"academia: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det[\"Other\"] = df_det[\"Graduates employed in\"].str.split(\"other: \", expand=True)[1].str.split(\"%\", expand=True)[0].astype(float)/100\n", | |
"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n", | |
"df_det.iloc[df_det[(df_det[\"Name\"]==\"Rutgers University\") & (df_det[\"sheetnames\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n", | |
"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Mathematical Finance\")].index.values[0], 1] = \"University of York (Mathematical Finance)\" \n", | |
"df_det.iloc[df_det[(df_det[\"Name\"]==\"University of York\") & (df_det[\"sheetnames\"]==\"MSc in Financial Engineering\")].index.values[0], 1] = \"University of York (Quantitative Finance)\" \n", | |
"df_det = df_det.set_index(\"Name\")\n", | |
"df_det[\"Most-cited lecturers\"] = df_det[\"Most-cited lecturers\"].replace(',','', regex=True)\n", | |
"df_det[\"Average Citations\"] = df_det[\"Most-cited lecturers\"].str.extractall('(\\d+)').unstack().loc[:,0].astype(float).mean(axis=1,skipna=True)\n", | |
"# both have 96 in the fall\n", | |
"df_det.loc[\"University of California, Berkeley (Haas School of Business)\",\"Full-time students\"] = 96\n", | |
"df_det.loc[\"New York University (Tandon School of Engineering)\",\"Full-time students\"] = 96\n", | |
"df_det[\"Staff to Students\"] = df_det[\"Teaching staff\"] / df_det[\"Full-time students\"]" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UF30mEvWN5Jd" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"df_quant = df_quant.replace({\"-\":np.nan})\n", | |
"def ser_perce(ser):\n", | |
" ser = ser.str.rstrip('%')\n", | |
" ser = ser.fillna(ser.median()).astype('float') / 100.0\n", | |
" return ser\n", | |
"\n", | |
"df_quant['emplm_rate_grad'] = ser_perce(df_quant['emplm_rate_grad'] )\n", | |
"df_quant['emplm_rate_3_month_aft'] = ser_perce(df_quant['emplm_rate_3_month_aft'] )\n", | |
"df_quant['avr_start_salary_bonus'] = df_quant['avr_start_salary_bonus'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n", | |
"df_quant['tuition'] = df_quant['tuition'].str.split().str[0]\n", | |
"df_quant['tuition'] = df_quant['tuition'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n", | |
"\n", | |
"df_quant['cohort_size'] = df_quant['cohort_size'].str.split().str[0].astype(int)\n", | |
"\n", | |
"df_quant = df_quant.fillna(df_quant.median())\n", | |
"voila = df_quant[[\"peer_assessment\",\"emplm_rate_grad\",\"emplm_rate_3_month_aft\",\"avr_start_salary_bonus\",\"avr_gre_quant\",\"tuition\",\"cohort_size\"]]\n", | |
"voila = (voila-voila.min())/(voila.max()-voila.min())\n", | |
"voila = voila * (1 - 0) + 0\n", | |
"\n", | |
"vals = voila[\"emplm_rate_grad\"] * 0.10 + voila[\"emplm_rate_3_month_aft\"] * 0.05 + voila[\"avr_start_salary_bonus\"] * 0.25 + voila[\"avr_start_salary_bonus\"] * 0.15" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rNNXkfEZXKsZ" | |
}, | |
"source": [ | |
"reppy = {\"Boston University\":\"Boston University (Questrom School of Business)\", \"NYU Tandon School of Engineering\":\"New York University (Tandon School of Engineering)\" , \"New York University\":\"New York University (Courant Institute of Mathematical Sciences)\" , \"University of California, Berkeley\":\"University of California, Berkeley (Haas School of Business)\", \n", | |
" \"University of California, Los Angeles\":\"University of California, Los Angeles (Anderson School of Management)\", \"Illinois Institute of Technology\": \"University of Illinois at Urbana-Champaign\", \"Princeton University\": \"Princeton University (Bendheim Center for Finance)\"}\n", | |
"\n", | |
"df_quant = df_quant.replace(reppy)\n", | |
"\n", | |
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Columbia University\") & (df_quant[\"program\"]==\"Financial Engineering\")].index.values[0], 1] = \"Columbia University (Columbia Engineering)\"\n", | |
"\n", | |
"\n", | |
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Mathematical Finance\")].index.values[0], 1] = \"Rutgers University (Mathematical Finance)\" \n", | |
"df_quant.iloc[df_quant[(df_quant[\"uni_name\"]==\"Rutgers University\") & (df_quant[\"program\"]==\"Quantitative Finance\")].index.values[0], 1] = \"Rutgers University (Quantitative Finance)\" \n", | |
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Mathematical Finance\")].index.values[0], 0] = \"Rutgers University (Mathematical Finance)\" \n", | |
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"Rutgers University\") & (df_risk[\"Programme\"]==\"Master’s in Financial Statistics and Risk Management\")].index.values[0], 0] = \"Rutgers University (Quantitative Finance)\" \n", | |
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Mathematical Finance\")].index.values[0], 0] = \"University of York (Mathematical Finance)\" \n", | |
"df_risk.iloc[df_risk[(df_risk[\"University/institution\"]==\"University of York\") & (df_risk[\"Programme\"]==\"MSc in Financial Engineering\")].index.values[0], 0] = \"University of York (Quantitative Finance)\" \n", | |
"\n", | |
"df_risk['Accept. rate'] = ser_perce(df_risk['Accept. rate'] )\n", | |
"df_risk['Empl. rate'] = ser_perce(df_risk['Empl. rate'] )\n", | |
"\n", | |
"df_risk['Salary ($)'] = df_risk['Salary ($)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n", | |
"\n", | |
"df_risk = df_risk.fillna(df_risk.median())\n", | |
"df_risk_two[\"Accept. rate\"] = ser_perce(df_risk_two[\"Accept. rate\"] )\n", | |
"df_risk_two[\"Students accepting\"] = ser_perce(df_risk_two[\"Students accepting\"] )\n", | |
"df_risk_two[\"Employment rate\"] = ser_perce(df_risk_two[\"Employment rate\"] )\n", | |
"df_risk_two[\"Industry-affiliated lecturers\"] = ser_perce(df_risk_two[\"Industry-affiliated lecturers\"] )\n", | |
"df_risk_two[\"Salary (US$, adjusted)\"] = df_risk_two['Salary (US$, adjusted)'].replace({'\\$': '', ',': ''}, regex=True).astype(float)\n", | |
"df_risk_two = df_risk_two.fillna(df_risk_two.median())" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vTANblkqgPjq" | |
}, | |
"source": [ | |
"df_risk_two = df_risk_two.drop([\"Accept. rate\", \"Class size\", \"Salary (US$, adjusted)\"], axis=1)\n", | |
"df_risk_merge = pd.merge(df_risk, df_risk_two, on=\"University/institution\",how=\"left\")\n", | |
"df_risk_merge = pd.merge(df_risk_merge, df_det[[\"Average Citations\", \"Staff to Students\"]],left_on=\"University/institution\", right_index=True,how=\"left\")\n", | |
"df_risk_merge = df_risk_merge.fillna(df_risk_merge.median())" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ULZO466AeXwr" | |
}, | |
"source": [ | |
"full_fang = pd.merge(df_risk_merge, df_quant, left_on=\"University/institution\", right_on=\"uni_name\",how=\"left\")\n", | |
"full_fang = full_fang.fillna(full_fang.median())\n", | |
"full_fang[\"salary_descrepancy\"] = (full_fang[\"avr_start_salary_bonus\"] - full_fang[\"Salary ($)\"]).abs()\n", | |
"full_fang.loc[full_fang['Country'] != 'US', 'salary_descrepancy'] = np.nan\n", | |
"full_fang = full_fang.fillna(full_fang.median())\n", | |
"full_fang[\"class_descrepancy\"] = (full_fang[\"Class size\"] - full_fang[\"cohort_size\"]).abs()\n", | |
"full_fang.loc[full_fang['Country'] != 'US', 'class_descrepancy'] = np.nan\n", | |
"full_fang[\"employment_descrepancy\"] = (full_fang[\"Empl. rate\"] - full_fang[\"emplm_rate_3_month_aft\"]).abs()\n", | |
"full_fang.loc[full_fang['Country'] != 'US', 'employment_descrepancy'] = np.nan\n", | |
"full_fang = full_fang.fillna(full_fang.median())\n", | |
"full_fang = full_fang.set_index(\"University/institution\")\n", | |
"full_fang = full_fang[full_fang.select_dtypes(include=np.number).columns.tolist()]\n", | |
"bool_df = full_fang.copy()\n", | |
"for col in bool_df.columns:\n", | |
" bool_df[col] = bool_df[col].duplicated()\n", | |
" \n", | |
"bool_df = bool_df.replace({False:0, True:1})\n", | |
"full_fang[\"missing_data\"] = bool_df.sum(axis=1)\n", | |
"\n", | |
"full_fang = pd.merge(full_fang,df_rank, left_index=True, right_index=True)\n", | |
"# full_fang = full_fang.drop('University of York') # replicated fix later\n", | |
"normalized_df=(full_fang-full_fang.min())/(full_fang.max()-full_fang.min())" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yq7WL6hvr3bJ" | |
}, | |
"source": [ | |
"reputation = normalized_df[\"peer_assessment\"]*.20 + normalized_df[\"avr_gre_quant\"]*.20 + (1-(normalized_df[\"salary_descrepancy\"]+normalized_df[\"class_descrepancy\"]+normalized_df[\"employment_descrepancy\"]+normalized_df[\"missing_data\"])/4)*.20 + (1-normalized_df[\"Ranking\"])*.20 + normalized_df[\"Students accepting\"]*0.05 + (1-normalized_df[\"Accept. rate\"])*0.15\n", | |
"remuneration = (normalized_df[\"Salary ($)\"] + normalized_df[\"avr_start_salary_bonus\"])/2*.50 + (normalized_df[\"Empl. rate\"] + normalized_df[\"emplm_rate_3_month_aft\"] + normalized_df[\"emplm_rate_grad\"])/3*.25 +(1-normalized_df[\"tuition\"])*.25\n", | |
"quality = normalized_df[\"Industry-affiliated lecturers\"]*.40 + (1- (normalized_df[\"Class size\"]))*.20 + normalized_df[\"Staff to Students\"]*.20 + normalized_df[\"Average Citations\"]*.20\n", | |
"\n", | |
"df_new = reputation.to_frame()\n", | |
"df_new.columns = [\"Reputation\"]\n", | |
"df_new[\"Remuneration\"] = remuneration.values\n", | |
"df_new[\"Quality\"] = quality.values\n", | |
"df_new_norm =(df_new-df_new.min())/(df_new.max()-df_new.min())\n", | |
"df_new_norm[\"Final\"] = df_new_norm.sum(axis=1)/3\n", | |
"\n", | |
"df_all = pd.merge(pd.merge(df_new_norm[[\"Final\",\"Reputation\",\"Remuneration\",\"Quality\"]],full_fang, left_index=True,right_index=True, how=\"left\"), df_det.drop([\"Employment rate\", \"Average Citations\", \"Staff to Students\"],axis=1), left_index=True, right_index=True, how=\"left\")\n", | |
"df_all = df_all.round(2)\n", | |
"df_all.sort_values(\"Final\",ascending=False).reset_index().to_csv(\"quant_rating.csv\", index=False)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment