Skip to content

Instantly share code, notes, and snippets.

@snippsat
Created October 17, 2019 10:02
Show Gist options
  • Save snippsat/03430e968ab3569ad7c722a875bfdb5a to your computer and use it in GitHub Desktop.
Save snippsat/03430e968ab3569ad7c722a875bfdb5a to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_html('https://www.basketball-reference.com/boxscores/201810160BOS.html', header=1)\n",
"len(df) # How many tables there are on site"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Starters</th>\n",
" <th>MP</th>\n",
" <th>FG</th>\n",
" <th>FGA</th>\n",
" <th>FG%</th>\n",
" <th>3P</th>\n",
" <th>3PA</th>\n",
" <th>3P%</th>\n",
" <th>FT</th>\n",
" <th>FTA</th>\n",
" <th>...</th>\n",
" <th>ORB</th>\n",
" <th>DRB</th>\n",
" <th>TRB</th>\n",
" <th>AST</th>\n",
" <th>STL</th>\n",
" <th>BLK</th>\n",
" <th>TOV</th>\n",
" <th>PF</th>\n",
" <th>PTS</th>\n",
" <th>+/-</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ben Simmons</td>\n",
" <td>11:11</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>.500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>-5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Joel Embiid</td>\n",
" <td>10:33</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>.333</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>.000</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>-5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Robert Covington</td>\n",
" <td>7:41</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Markelle Fultz</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Dario Šarić</td>\n",
" <td>7:25</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>.000</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>.000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>+1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Starters MP FG FGA FG% 3P 3PA 3P% FT FTA ... \\\n",
"0 Ben Simmons 11:11 2 4 .500 0 0 NaN 3 7 ... \n",
"1 Joel Embiid 10:33 2 6 .333 0 2 .000 3 4 ... \n",
"2 Robert Covington 7:41 0 0 NaN 0 0 NaN 0 0 ... \n",
"3 Markelle Fultz NaN NaN NaN NaN NaN NaN NaN NaN NaN ... \n",
"4 Dario Šarić 7:25 0 3 .000 0 2 .000 0 0 ... \n",
"\n",
" ORB DRB TRB AST STL BLK TOV PF PTS +/- \n",
"0 0 2 2 1 1 1 1 4 7 -5 \n",
"1 0 3 3 1 0 0 1 1 7 -5 \n",
"2 0 2 2 0 0 0 0 0 0 -6 \n",
"3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"4 0 1 1 1 0 0 0 0 0 +1 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Take out table 5\n",
"df = df[5]\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Starters object\n",
"MP object\n",
"FG object\n",
"FGA object\n",
"FG% object\n",
"3P object\n",
"3PA object\n",
"3P% object\n",
"FT object\n",
"FTA object\n",
"FT% object\n",
"ORB object\n",
"DRB object\n",
"TRB object\n",
"AST object\n",
"STL object\n",
"BLK object\n",
"TOV object\n",
"PF object\n",
"PTS object\n",
"+/- object\n",
"dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Look at types at start to clean up\n",
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment