from bs4 import BeautifulSoup def get_lc_term_name(lc_auth_number): # create the LoC address by inserting the auth id into a template lc_template = "http://id.loc.gov/authorities/names/{0}.html" lc_address = lc_template.format(lc_auth_number) # get the html for that address response = urlopen(lc_address).read() # Create a BeatifulSoup object from the response soup = BeautifulSoup(response) # the info we want is in the first <h1> tag # BeautifulSoup makes extracting that text very simple: lc_name = soup.h1.text.encode("utf-8") return lc_name