Skip to content

Instantly share code, notes, and snippets.

@snippsat
Created June 16, 2019 02:11
Show Gist options
  • Save snippsat/7b1e7be93795a974ab228d15023d467f to your computer and use it in GitHub Desktop.
Save snippsat/7b1e7be93795a974ab228d15023d467f to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"#from tabulate import tabulate\n",
"\n",
"res = requests.get(\"http://web.archive.org/web/20070701133815/http://www.bbmf.co.uk/september07.html\")\n",
"soup = BeautifulSoup(res.content,'lxml')\n",
"table = soup.find_all('table')[0]\n",
"\n",
"df = pd.read_html(str(table))\n",
"#print( tabulate(df[0], headers='keys', tablefmt='psql') )"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Date</td>\n",
" <td>Location</td>\n",
" <td>Lancaster</td>\n",
" <td>Spitfire</td>\n",
" <td>Hurricane</td>\n",
" <td>Dakota</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>Fort Nelson</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>Lydd - Display</td>\n",
" <td>L</td>\n",
" <td>S</td>\n",
" <td>H</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>Shackerstone - Display</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 \\\n",
"0 Date Location Lancaster Spitfire Hurricane \n",
"1 September September September September September \n",
"2 1 Fort Nelson NaN S NaN \n",
"3 NaN Lydd - Display L S H \n",
"4 NaN Shackerstone - Display NaN S NaN \n",
"\n",
" 5 \n",
"0 Dakota \n",
"1 September \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df[1]\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns=df.iloc[0])"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Location</th>\n",
" <th>Lancaster</th>\n",
" <th>Spitfire</th>\n",
" <th>Hurricane</th>\n",
" <th>Dakota</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Date</td>\n",
" <td>Location</td>\n",
" <td>Lancaster</td>\n",
" <td>Spitfire</td>\n",
" <td>Hurricane</td>\n",
" <td>Dakota</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" <td>September</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>Fort Nelson</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>Lydd - Display</td>\n",
" <td>L</td>\n",
" <td>S</td>\n",
" <td>H</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>Shackerstone - Display</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Location Lancaster Spitfire Hurricane \\\n",
"0 Date Location Lancaster Spitfire Hurricane \n",
"1 September September September September September \n",
"2 1 Fort Nelson NaN S NaN \n",
"3 NaN Lydd - Display L S H \n",
"4 NaN Shackerstone - Display NaN S NaN \n",
"\n",
" Dakota \n",
"0 Dakota \n",
"1 September \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"df = df[(df['Location'] != \"\") & (df['Spitfire'] == 'S')]"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"df = df.dropna(axis='columns')"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Location</th>\n",
" <th>Spitfire</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Fort Nelson</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Lydd - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Shackerstone - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Rye Meadows</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Alfrick</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Lydd - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Chart Sutton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Gedling</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Darley</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>RAF Wittering</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>RAF Honington</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>RAF Leeming</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Penistone</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Menwith Hill</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Leuchars Charity Ball - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Duxford - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Faldingworth</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Donington Park</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Cliveden</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Snetterton - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Duxford - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Cliveden</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Kemble - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Donington Park</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Morecambe - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>King Lynn</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>Norwich County Hall</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>Norwich City Hall</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>Odiham</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Shawbury</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>RAF Halton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>RAF Wittering</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>RAF Coningsby</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>RAF Leeming</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>RAF Kirkton in Lindsey</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>Middle Wallop - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Shoreham - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>Chichester</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>Boston</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Sheringham</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>Costessey</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>Bentley Priory - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>Little Casterton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>Collingham</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>Shoreham - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>Newhaven Fort</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Staplehurst</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>Weald of Kent</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>Long Sutton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Sheringham</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>Costessey</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>Norwich Cathedral</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>Boston</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>Shrivenham</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>RAF Scampton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>RAF Digby</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Hanley Stoke on Trent</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>Southport - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>Southport - Display</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>Flixton</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>65 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Location Spitfire\n",
"2 Fort Nelson S\n",
"3 Lydd - Display S\n",
"4 Shackerstone - Display S\n",
"5 Rye Meadows S\n",
"6 Alfrick S\n",
"7 Lydd - Display S\n",
"8 Chart Sutton S\n",
"9 Gedling S\n",
"13 Darley S\n",
"14 RAF Wittering S\n",
"15 RAF Honington S\n",
"16 RAF Leeming S\n",
"17 Penistone S\n",
"18 Menwith Hill S\n",
"19 Leuchars Charity Ball - Display S\n",
"24 Duxford - Display S\n",
"25 Faldingworth S\n",
"26 Donington Park S\n",
"27 Cliveden S\n",
"28 Snetterton - Display S\n",
"29 Duxford - Display S\n",
"31 Cliveden S\n",
"32 Kemble - Display S\n",
"33 Donington Park S\n",
"34 Morecambe - Display S\n",
"37 King Lynn S\n",
"38 Norwich County Hall S\n",
"39 Norwich City Hall S\n",
"40 Odiham S\n",
"41 Shawbury S\n",
".. ... ...\n",
"47 RAF Halton S\n",
"48 RAF Wittering S\n",
"49 RAF Coningsby S\n",
"50 RAF Leeming S\n",
"51 RAF Kirkton in Lindsey S\n",
"52 Middle Wallop - Display S\n",
"53 Shoreham - Display S\n",
"56 Chichester S\n",
"57 Boston S\n",
"58 Sheringham S\n",
"59 Costessey S\n",
"61 Bentley Priory - Display S\n",
"62 Little Casterton S\n",
"63 Collingham S\n",
"66 Shoreham - Display S\n",
"67 Newhaven Fort S\n",
"68 Staplehurst S\n",
"69 Weald of Kent S\n",
"72 Long Sutton S\n",
"73 Sheringham S\n",
"74 Costessey S\n",
"75 Norwich Cathedral S\n",
"76 Boston S\n",
"77 Shrivenham S\n",
"80 RAF Scampton S\n",
"81 RAF Digby S\n",
"86 Hanley Stoke on Trent S\n",
"87 Southport - Display S\n",
"88 Southport - Display S\n",
"89 Flixton S\n",
"\n",
"[65 rows x 2 columns]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Location</th>\n",
" <th>Spitfire</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>Boston</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>Boston</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Location Spitfire\n",
"57 Boston S\n",
"76 Boston S"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['Location'] == 'Boston') & (df['Spitfire'] == 'S')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment