Skip to content

Instantly share code, notes, and snippets.

@nk9
Last active July 11, 2016 21:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nk9/cf7d79d981ce606ce8059973144bbfea to your computer and use it in GitHub Desktop.
Save nk9/cf7d79d981ce606ce8059973144bbfea to your computer and use it in GitHub Desktop.
Mis-directed links in Ada
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
urlFormat = "http://www.ada.auckland.ac.nz/ada1%d.htm"
urls = [urlFormat % x for x in range(1, 40)] # Only chapteres 1-39 are annotated
for url in urls:
response = requests.get(url)
# parse html
soup = BeautifulSoup(response.content, "html.parser")
links = soup.find_all('a') # Finds all hrefs from the html doc
printedHeader = False
for tag in links:
link = tag.get('href',None)
target = tag.get('target', None)
if link is not None and target is None:
if "#" in link:
if not printedHeader:
print "\n === " + url
printedHeader = True
print link + "\t" + tag.text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment