Skip to content

Instantly share code, notes, and snippets.

@patricoferris
Created September 19, 2018 12:42
Show Gist options
  • Save patricoferris/4c953c7cb640ad2bffae9e2f7c731254 to your computer and use it in GitHub Desktop.
Save patricoferris/4c953c7cb640ad2bffae9e2f7c731254 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import json
#Accessing the Webpage
result = requests.get('https://github.com/factbook/factbook.json')
#Getting the content of the webpage
content = result.content
#Beautiful-souping it
bsoup = BeautifulSoup(content, 'html5lib')
#Getting the table data cells from the page with a specific class - (Use Chrome Dev Tools)
tds = bsoup.find_all('td', class_='content')
#An array of the possible continents
continents = []
for td in tds[1:14]:
continents.append(td.text.split('\n')[1].split(' ')[-1])
code_to_continent = {}
#Key-value pairs of country code to their respective continent
for continent in continents:
if continent != 'meta':
url = 'https://github.com/factbook/factbook.json/tree/master/{c}'.format(c=continent)
countries_page = requests.get(url)
content = countries_page.content
bsoup = BeautifulSoup(content, 'html5lib')
tds = bsoup.find_all('td', class_='content')
for td in tds[1:]:
code_to_continent[td.text.split('\n')[1].split(' ')[-1].split('.')[0]] = continent
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment