Created
February 13, 2021 12:00
-
-
Save hyunsikhwang/a7d22d7a7236a156bb8fc17bb98d6374 to your computer and use it in GitHub Desktop.
Bloomberg Billionaires Index.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Bloomberg Billionaires Index.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyNMfdzd8vKx3n1bMH6p1oLP", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/hyunsikhwang/a7d22d7a7236a156bb8fc17bb98d6374/bloomberg-billionaires-index.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 455 | |
}, | |
"id": "5zy8C-o1f0J-", | |
"outputId": "7344c532-edf6-4481-fc75-406be910d635" | |
}, | |
"source": [ | |
"%%time\r\n", | |
"\r\n", | |
"import requests\r\n", | |
"import bs4\r\n", | |
"import re\r\n", | |
"import pandas as pd\r\n", | |
"\r\n", | |
"\r\n", | |
"url = 'https://www.bloomberg.com/billionaires/'\r\n", | |
"\r\n", | |
"def get_bs(url):\r\n", | |
" #headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}\r\n", | |
" headers = {\r\n", | |
"\t'Host': 'www.bloomberg.com',\r\n", | |
"\t'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0',\r\n", | |
"\t'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\r\n", | |
"\t'Accept-Language': 'en-US,en;q=0.5',\r\n", | |
"\t'Accept-Encoding': 'gzip, deflate, br',\r\n", | |
"\t'Upgrade-Insecure-Requests': '1',\r\n", | |
"\t'Connection': 'keep-alive',\r\n", | |
"\t'Pragma': 'no-cache',\r\n", | |
"\t'Cache-Control': 'no-cache',\r\n", | |
"\t'TE': 'Trailers'}\r\n", | |
" \r\n", | |
" return bs4.BeautifulSoup(requests.get(url, headers=headers).text, \"lxml\")\r\n", | |
"\r\n", | |
"divList = get_bs(url).findAll(\"div\", attrs={\"class\": re.compile(\"table-row\")})\r\n", | |
"\r\n", | |
"dataLst = []\r\n", | |
"for div in divList:\r\n", | |
" rows = div.text.strip().split('\\n')\r\n", | |
" rows = [x.strip(' ') for x in rows]\r\n", | |
" rows = list(filter(None, rows))\r\n", | |
" dataLst.append(rows)\r\n", | |
"\r\n", | |
"\r\n", | |
"df = pd.DataFrame(dataLst)\r\n", | |
"df.columns = df.iloc[0]\r\n", | |
"df = df[1:]\r\n", | |
"print(df)\r\n", | |
"\r\n" | |
], | |
"execution_count": 114, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Rank</th>\n", | |
" <th>Name</th>\n", | |
" <th>Total net worth</th>\n", | |
" <th>$ Last change</th>\n", | |
" <th>$ YTD change</th>\n", | |
" <th>Country</th>\n", | |
" <th>Industry</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>Elon Musk</td>\n", | |
" <td>$195B</td>\n", | |
" <td>+$1.01B</td>\n", | |
" <td>+$25.1B</td>\n", | |
" <td>United States</td>\n", | |
" <td>Technology</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>Jeff Bezos</td>\n", | |
" <td>$192B</td>\n", | |
" <td>+$829M</td>\n", | |
" <td>+$1.26B</td>\n", | |
" <td>United States</td>\n", | |
" <td>Technology</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>Bill Gates</td>\n", | |
" <td>$137B</td>\n", | |
" <td>+$508M</td>\n", | |
" <td>+$5.25B</td>\n", | |
" <td>United States</td>\n", | |
" <td>Technology</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>Bernard Arnault</td>\n", | |
" <td>$114B</td>\n", | |
" <td>+$37.5M</td>\n", | |
" <td>-$119M</td>\n", | |
" <td>France</td>\n", | |
" <td>Consumer</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>5</td>\n", | |
" <td>Mark Zuckerberg</td>\n", | |
" <td>$103B</td>\n", | |
" <td>+$40.5M</td>\n", | |
" <td>-$954M</td>\n", | |
" <td>United States</td>\n", | |
" <td>Technology</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>496</th>\n", | |
" <td>496</td>\n", | |
" <td>Zhang Lei</td>\n", | |
" <td>$5.60B</td>\n", | |
" <td>+$978k</td>\n", | |
" <td>-$40.3M</td>\n", | |
" <td>China</td>\n", | |
" <td>Finance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>497</th>\n", | |
" <td>497</td>\n", | |
" <td>Lin Li</td>\n", | |
" <td>$5.58B</td>\n", | |
" <td>$0</td>\n", | |
" <td>-$516M</td>\n", | |
" <td>China</td>\n", | |
" <td>Diversified</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>498</th>\n", | |
" <td>498</td>\n", | |
" <td>Charles Butt & family</td>\n", | |
" <td>$5.57B</td>\n", | |
" <td>$0</td>\n", | |
" <td>+$249M</td>\n", | |
" <td>United States</td>\n", | |
" <td>Retail</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>499</th>\n", | |
" <td>499</td>\n", | |
" <td>Naguib Sawiris</td>\n", | |
" <td>$5.56B</td>\n", | |
" <td>-$3.35M</td>\n", | |
" <td>+$78.6M</td>\n", | |
" <td>Egypt</td>\n", | |
" <td>Media & Telecom</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>500</th>\n", | |
" <td>500</td>\n", | |
" <td>Edward Roski</td>\n", | |
" <td>$5.54B</td>\n", | |
" <td>$0</td>\n", | |
" <td>-$282M</td>\n", | |
" <td>United States</td>\n", | |
" <td>Real Estate</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>500 rows × 7 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"0 Rank Name ... Country Industry\n", | |
"1 1 Elon Musk ... United States Technology\n", | |
"2 2 Jeff Bezos ... United States Technology\n", | |
"3 3 Bill Gates ... United States Technology\n", | |
"4 4 Bernard Arnault ... France Consumer\n", | |
"5 5 Mark Zuckerberg ... United States Technology\n", | |
".. ... ... ... ... ...\n", | |
"496 496 Zhang Lei ... China Finance\n", | |
"497 497 Lin Li ... China Diversified\n", | |
"498 498 Charles Butt & family ... United States Retail\n", | |
"499 499 Naguib Sawiris ... Egypt Media & Telecom\n", | |
"500 500 Edward Roski ... United States Real Estate\n", | |
"\n", | |
"[500 rows x 7 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 285 ms, sys: 0 ns, total: 285 ms\n", | |
"Wall time: 423 ms\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment