Skip to content

Instantly share code, notes, and snippets.

@kychanbp
Created March 11, 2020 06:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kychanbp/5c0cdc375bf8637a8d61164860678987 to your computer and use it in GitHub Desktop.
Save kychanbp/5c0cdc375bf8637a8d61164860678987 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import bs4 as bs\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def checkEqual(lst):\n",
" \"check if a list contains same element\"\n",
" return lst[1:] == lst[:-1]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"url = \"http://www.aastocks.com/en/mpf/search.aspx?tab=1&sp=\"\n",
"url_details = \"http://www.aastocks.com/en/mpf/compare.aspx?comp1={}\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"page = requests.get(url)\n",
"soup = bs.BeautifulSoup(page.text, 'lxml')\n",
"parsed_table = soup.find_all('table', {\"class\":\"tblM s2 mpfDL\"})[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_html(str(parsed_table),encoding='utf-8', header=0)[0]\n",
"df = df.rename(columns={'Unnamed: 0':'provider'})\n",
"df['provider'] = df['provider'].ffill()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# remove rows with same element\n",
"equal_rows = []\n",
"for index, row in df.iterrows():\n",
" if checkEqual(row.to_list()):\n",
" equal_rows.append(index)\n",
"\n",
"df = df[~df.index.isin(equal_rows)]\n",
"df = df[:-1]\n",
"df = df.reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# extract the detail code for each fund\n",
"df['link'] = [np.where(tag.has_attr('href'),tag.get('href'),\"no link\") for tag in parsed_table.find_all('a')]\n",
"df['link'] = df['link'].astype(str)\n",
"df['code'] = df['link'].str.extract(r\"\\((\\d*)\\)\", expand=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# get the risk level\n",
"selector = '#compare_table > table > tr:nth-child(16) > td.center.cls'\n",
"risk = []\n",
"for index, row in df.iterrows():\n",
" details = requests.get(url_details.format(row['code']))\n",
" soup = bs.BeautifulSoup(details.text, \"html.parser\")\n",
" risk.append(soup.select_one(selector).text)\n",
"\n",
"df['risk'] = risk"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['provider', 'Constitutent Fund', 'Type', 'Price', '1-Y Chg', '6-M Chg',\n",
" '3-M Chg', '1-M Chg', 'YTD Chg',\n",
" 'FER FER?The Fund Expense Ratio (FER) outlines the fees and charges of the relevant fund in percentage for the previous financial period. Note that the FER is calculated based on data from the previous financial period. The published FER will not reflect any increases or decreases in fees, charges or expenses in the current financial period. It is not necessary for funds with less than 2 years of history to show a FER.',\n",
" 'Unnamed: 10', 'link', 'code', 'risk'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('results/table.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"df['1 year return'] = df['1-Y Chg'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"df['6 month return'] = df['6-M Chg'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"df['3 month return'] = df['3-M Chg'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"df['1 month return'] = df['1-M Chg'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"df['year to date return'] = df['YTD Chg'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"df['risk'] = df['risk'].str.extract(r\"([+,-]?\\d{1,3}.\\d{1,2})\\%\", expand=True).astype(float)\n",
"\n",
"df_selected = df[['provider','Constitutent Fund', 'Type', 'Price', '1 year return', '6 month return', '3 month return',\n",
" '1 month return', 'year to date return', 'risk']]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"df_selected = df_selected[~df_selected['risk'].isnull()].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df_selected['sharp ratio'] = (df_selected['1 year return']-2)/df_selected['risk']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>provider</th>\n",
" <th>Constitutent Fund</th>\n",
" <th>Type</th>\n",
" <th>Price</th>\n",
" <th>1 year return</th>\n",
" <th>6 month return</th>\n",
" <th>3 month return</th>\n",
" <th>1 month return</th>\n",
" <th>year to date return</th>\n",
" <th>risk</th>\n",
" <th>sharp ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>193</th>\n",
" <td>HSBC Mandatory Provident Fund - SuperTrust Plus</td>\n",
" <td>Age 65 Plus Fund</td>\n",
" <td>Mixed Assets</td>\n",
" <td>12.6702020/01/31</td>\n",
" <td>8.57</td>\n",
" <td>3.34</td>\n",
" <td>1.36</td>\n",
" <td>1.12</td>\n",
" <td>1.12</td>\n",
" <td>3.15</td>\n",
" <td>2.085714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>Hang Seng Mandatory Provident Fund - SuperTrus...</td>\n",
" <td>Age 65 Plus Fund</td>\n",
" <td>Mixed Assets</td>\n",
" <td>12.6702020/01/31</td>\n",
" <td>8.57</td>\n",
" <td>3.34</td>\n",
" <td>1.36</td>\n",
" <td>1.12</td>\n",
" <td>1.12</td>\n",
" <td>3.15</td>\n",
" <td>2.085714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>Manulife Global Select (MPF) Scheme</td>\n",
" <td>Manulife MPF European Equity Fund</td>\n",
" <td>Equity</td>\n",
" <td>11.9522020/01/31</td>\n",
" <td>21.17</td>\n",
" <td>8.74</td>\n",
" <td>4.83</td>\n",
" <td>-1.99</td>\n",
" <td>-1.99</td>\n",
" <td>11.84</td>\n",
" <td>1.619088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>359</th>\n",
" <td>SHKP MPF Employer Sponsored Scheme</td>\n",
" <td>Manulife Career Average Guaranteed Fund - SHKP</td>\n",
" <td>Guaranteed</td>\n",
" <td>13.3742020/01/31</td>\n",
" <td>6.27</td>\n",
" <td>1.90</td>\n",
" <td>0.80</td>\n",
" <td>1.38</td>\n",
" <td>1.38</td>\n",
" <td>2.77</td>\n",
" <td>1.541516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>267</th>\n",
" <td>MASS Mandatory Provident Fund Scheme</td>\n",
" <td>US Equity Fund</td>\n",
" <td>Equity</td>\n",
" <td>25.4332020/01/31</td>\n",
" <td>23.08</td>\n",
" <td>7.25</td>\n",
" <td>9.57</td>\n",
" <td>3.77</td>\n",
" <td>3.77</td>\n",
" <td>13.76</td>\n",
" <td>1.531977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>218</th>\n",
" <td>Sun Life MPF Comprehensive Scheme</td>\n",
" <td>Sun Life MPF Comprehensive Scheme MPF Conserva...</td>\n",
" <td>MPF Conservative</td>\n",
" <td>11.1602020/01/31</td>\n",
" <td>0.18</td>\n",
" <td>0.18</td>\n",
" <td>0.09</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.05</td>\n",
" <td>-36.400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>360</th>\n",
" <td>SHKP MPF Employer Sponsored Scheme</td>\n",
" <td>Invesco MPF Conservative Fund</td>\n",
" <td>MPF Conservative</td>\n",
" <td>11.3322020/01/31</td>\n",
" <td>1.39</td>\n",
" <td>0.80</td>\n",
" <td>0.40</td>\n",
" <td>0.15</td>\n",
" <td>0.15</td>\n",
" <td>0.00</td>\n",
" <td>-inf</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>AIA MPF - Prime Value Choice</td>\n",
" <td>MPF Conservative Fund</td>\n",
" <td>MPF Conservative</td>\n",
" <td>113.8702020/01/31</td>\n",
" <td>1.12</td>\n",
" <td>0.65</td>\n",
" <td>0.29</td>\n",
" <td>0.11</td>\n",
" <td>0.11</td>\n",
" <td>0.00</td>\n",
" <td>-inf</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>AIA MPF - Prime Value Choice</td>\n",
" <td>Guaranteed Portfolio</td>\n",
" <td>Guaranteed</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>Manulife Global Select (MPF) Scheme</td>\n",
" <td>Manulife MPF Interest Fund</td>\n",
" <td>Guaranteed</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.07</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>427 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" provider \\\n",
"193 HSBC Mandatory Provident Fund - SuperTrust Plus \n",
"179 Hang Seng Mandatory Provident Fund - SuperTrus... \n",
"239 Manulife Global Select (MPF) Scheme \n",
"359 SHKP MPF Employer Sponsored Scheme \n",
"267 MASS Mandatory Provident Fund Scheme \n",
".. ... \n",
"218 Sun Life MPF Comprehensive Scheme \n",
"360 SHKP MPF Employer Sponsored Scheme \n",
"18 AIA MPF - Prime Value Choice \n",
"21 AIA MPF - Prime Value Choice \n",
"265 Manulife Global Select (MPF) Scheme \n",
"\n",
" Constitutent Fund Type \\\n",
"193 Age 65 Plus Fund Mixed Assets \n",
"179 Age 65 Plus Fund Mixed Assets \n",
"239 Manulife MPF European Equity Fund Equity \n",
"359 Manulife Career Average Guaranteed Fund - SHKP Guaranteed \n",
"267 US Equity Fund Equity \n",
".. ... ... \n",
"218 Sun Life MPF Comprehensive Scheme MPF Conserva... MPF Conservative \n",
"360 Invesco MPF Conservative Fund MPF Conservative \n",
"18 MPF Conservative Fund MPF Conservative \n",
"21 Guaranteed Portfolio Guaranteed \n",
"265 Manulife MPF Interest Fund Guaranteed \n",
"\n",
" Price 1 year return 6 month return 3 month return \\\n",
"193 12.6702020/01/31 8.57 3.34 1.36 \n",
"179 12.6702020/01/31 8.57 3.34 1.36 \n",
"239 11.9522020/01/31 21.17 8.74 4.83 \n",
"359 13.3742020/01/31 6.27 1.90 0.80 \n",
"267 25.4332020/01/31 23.08 7.25 9.57 \n",
".. ... ... ... ... \n",
"218 11.1602020/01/31 0.18 0.18 0.09 \n",
"360 11.3322020/01/31 1.39 0.80 0.40 \n",
"18 113.8702020/01/31 1.12 0.65 0.29 \n",
"21 NaN NaN NaN NaN \n",
"265 NaN NaN NaN NaN \n",
"\n",
" 1 month return year to date return risk sharp ratio \n",
"193 1.12 1.12 3.15 2.085714 \n",
"179 1.12 1.12 3.15 2.085714 \n",
"239 -1.99 -1.99 11.84 1.619088 \n",
"359 1.38 1.38 2.77 1.541516 \n",
"267 3.77 3.77 13.76 1.531977 \n",
".. ... ... ... ... \n",
"218 0.00 0.00 0.05 -36.400000 \n",
"360 0.15 0.15 0.00 -inf \n",
"18 0.11 0.11 0.00 -inf \n",
"21 NaN NaN 0.00 NaN \n",
"265 NaN NaN 0.07 NaN \n",
"\n",
"[427 rows x 11 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_selected.sort_values('sharp ratio', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mpf-hk",
"language": "python",
"name": "mpf-hk"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment