# for sending http get requests | |
import requests | |
# for parsing the response in searchable format | |
from bs4 import BeautifulSoup | |
# send requests | |
r = requests.get('https://en.wikipedia.org/wiki/List_of_national_anthems') | |
# parse into searchable object | |
soup = BeautifulSoup(r.content, 'html5lib') | |
# the rows are stored in an attribute like this | |
trs = soup.findAll('tr') | |
# country can be parsed like this | |
def get_country(tr): | |
try: | |
return tr.findAll('a')[0].text | |
except: | |
return 'No country' | |
# other attributes can be parsed like this | |
def get_nth_td_text(tr, n): | |
try: | |
return tr.findAll('td')[n].text | |
except: | |
return 'N/A' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment