Skip to content

Instantly share code, notes, and snippets.

Last active Feb 11, 2020
What would you like to do?
import requests
from bs4 import BeautifulSoup
import re
from collections import namedtuple
LineContent = namedtuple('LineContent', 'name age_range')
def get_url(url):
response = requests.get(url)
content_type = response.headers['Content-Type'].lower()
if (response.status_code == 200
and content_type is not None
and content_type.find('html') > -1):
return response.content
except Exception as e:
print ('error when accessing', url, '\n', e)
return None
def extract_data(response_content):
raw_html = BeautifulSoup(response_content, 'html.parser')
relevant_lines = get_relevant_lines(raw_html)
for line in relevant_lines:
def print_line_if_age_under_60(line):
parsed_line = parse_line(line)
age = get_age(parsed_line)
if (age < 60):
print (, 'died at', age)
except Exception as e:
print ('could not parse line:', line, e)
def get_relevant_lines(raw_html):
relevant_lines = []
for index, element in enumerate('li')):
current_line = element.text
if (re.match('.*\(\d{4}(–|-)\d{4}\).*', current_line)):
return relevant_lines
def parse_line(line):
contents = line.split('(') ## ["name", "jahr-jahr), zusatz"]
name = contents[0]
remaining_contents = contents[1].split(')') ## ["jahr-jahr", "zusatz"]
age_range = remaining_contents[0]
return LineContent(name, age_range)
def get_age(parsed_line):
years = []
if ('-' in parsed_line.age_range):
years = parsed_line.age_range.split('-')
years = parsed_line.age_range.split('–')
return int(years[1]) - int(years[0])
if __name__ == '__main__':
response_content = get_url(' maticians_born_in_the_19th_century')
if (response_content != None):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment