Skip to content

Instantly share code, notes, and snippets.

@hyunsikhwang
Created February 13, 2021 12:00
Show Gist options
  • Save hyunsikhwang/a7d22d7a7236a156bb8fc17bb98d6374 to your computer and use it in GitHub Desktop.
Save hyunsikhwang/a7d22d7a7236a156bb8fc17bb98d6374 to your computer and use it in GitHub Desktop.
Bloomberg Billionaires Index.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Bloomberg Billionaires Index.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNMfdzd8vKx3n1bMH6p1oLP",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/hyunsikhwang/a7d22d7a7236a156bb8fc17bb98d6374/bloomberg-billionaires-index.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 455
},
"id": "5zy8C-o1f0J-",
"outputId": "7344c532-edf6-4481-fc75-406be910d635"
},
"source": [
"%%time\r\n",
"\r\n",
"import requests\r\n",
"import bs4\r\n",
"import re\r\n",
"import pandas as pd\r\n",
"\r\n",
"\r\n",
"url = 'https://www.bloomberg.com/billionaires/'\r\n",
"\r\n",
"def get_bs(url):\r\n",
" #headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}\r\n",
" headers = {\r\n",
"\t'Host': 'www.bloomberg.com',\r\n",
"\t'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0',\r\n",
"\t'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\r\n",
"\t'Accept-Language': 'en-US,en;q=0.5',\r\n",
"\t'Accept-Encoding': 'gzip, deflate, br',\r\n",
"\t'Upgrade-Insecure-Requests': '1',\r\n",
"\t'Connection': 'keep-alive',\r\n",
"\t'Pragma': 'no-cache',\r\n",
"\t'Cache-Control': 'no-cache',\r\n",
"\t'TE': 'Trailers'}\r\n",
" \r\n",
" return bs4.BeautifulSoup(requests.get(url, headers=headers).text, \"lxml\")\r\n",
"\r\n",
"divList = get_bs(url).findAll(\"div\", attrs={\"class\": re.compile(\"table-row\")})\r\n",
"\r\n",
"dataLst = []\r\n",
"for div in divList:\r\n",
" rows = div.text.strip().split('\\n')\r\n",
" rows = [x.strip(' ') for x in rows]\r\n",
" rows = list(filter(None, rows))\r\n",
" dataLst.append(rows)\r\n",
"\r\n",
"\r\n",
"df = pd.DataFrame(dataLst)\r\n",
"df.columns = df.iloc[0]\r\n",
"df = df[1:]\r\n",
"print(df)\r\n",
"\r\n"
],
"execution_count": 114,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Name</th>\n",
" <th>Total net worth</th>\n",
" <th>$ Last change</th>\n",
" <th>$ YTD change</th>\n",
" <th>Country</th>\n",
" <th>Industry</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>Elon Musk</td>\n",
" <td>$195B</td>\n",
" <td>+$1.01B</td>\n",
" <td>+$25.1B</td>\n",
" <td>United States</td>\n",
" <td>Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>Jeff Bezos</td>\n",
" <td>$192B</td>\n",
" <td>+$829M</td>\n",
" <td>+$1.26B</td>\n",
" <td>United States</td>\n",
" <td>Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>Bill Gates</td>\n",
" <td>$137B</td>\n",
" <td>+$508M</td>\n",
" <td>+$5.25B</td>\n",
" <td>United States</td>\n",
" <td>Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>Bernard Arnault</td>\n",
" <td>$114B</td>\n",
" <td>+$37.5M</td>\n",
" <td>-$119M</td>\n",
" <td>France</td>\n",
" <td>Consumer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>Mark Zuckerberg</td>\n",
" <td>$103B</td>\n",
" <td>+$40.5M</td>\n",
" <td>-$954M</td>\n",
" <td>United States</td>\n",
" <td>Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>496</th>\n",
" <td>496</td>\n",
" <td>Zhang Lei</td>\n",
" <td>$5.60B</td>\n",
" <td>+$978k</td>\n",
" <td>-$40.3M</td>\n",
" <td>China</td>\n",
" <td>Finance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>497</td>\n",
" <td>Lin Li</td>\n",
" <td>$5.58B</td>\n",
" <td>$0</td>\n",
" <td>-$516M</td>\n",
" <td>China</td>\n",
" <td>Diversified</td>\n",
" </tr>\n",
" <tr>\n",
" <th>498</th>\n",
" <td>498</td>\n",
" <td>Charles Butt &amp; family</td>\n",
" <td>$5.57B</td>\n",
" <td>$0</td>\n",
" <td>+$249M</td>\n",
" <td>United States</td>\n",
" <td>Retail</td>\n",
" </tr>\n",
" <tr>\n",
" <th>499</th>\n",
" <td>499</td>\n",
" <td>Naguib Sawiris</td>\n",
" <td>$5.56B</td>\n",
" <td>-$3.35M</td>\n",
" <td>+$78.6M</td>\n",
" <td>Egypt</td>\n",
" <td>Media &amp; Telecom</td>\n",
" </tr>\n",
" <tr>\n",
" <th>500</th>\n",
" <td>500</td>\n",
" <td>Edward Roski</td>\n",
" <td>$5.54B</td>\n",
" <td>$0</td>\n",
" <td>-$282M</td>\n",
" <td>United States</td>\n",
" <td>Real Estate</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>500 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
"0 Rank Name ... Country Industry\n",
"1 1 Elon Musk ... United States Technology\n",
"2 2 Jeff Bezos ... United States Technology\n",
"3 3 Bill Gates ... United States Technology\n",
"4 4 Bernard Arnault ... France Consumer\n",
"5 5 Mark Zuckerberg ... United States Technology\n",
".. ... ... ... ... ...\n",
"496 496 Zhang Lei ... China Finance\n",
"497 497 Lin Li ... China Diversified\n",
"498 498 Charles Butt & family ... United States Retail\n",
"499 499 Naguib Sawiris ... Egypt Media & Telecom\n",
"500 500 Edward Roski ... United States Real Estate\n",
"\n",
"[500 rows x 7 columns]"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"CPU times: user 285 ms, sys: 0 ns, total: 285 ms\n",
"Wall time: 423 ms\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment