Skip to content

Instantly share code, notes, and snippets.

@wvdschel
Created September 13, 2016 16:23
Show Gist options
  • Save wvdschel/4c5bc6b9c644a333018b8e1ff67c343a to your computer and use it in GitHub Desktop.
Save wvdschel/4c5bc6b9c644a333018b8e1ff67c343a to your computer and use it in GitHub Desktop.
import urllib.request
from html.parser import HTMLParser
class DilbertParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == "img":
comic = False
src = None
for attr in attrs:
name, value = attr
if name == "class" and "img-comic" in value:
comic = True
elif name == "src":
src = value
if comic:
print("Dilbert comic image: %s" % src)
with urllib.request.urlopen('http://dilbert.com') as f:
parser = DilbertParser()
parser.feed(str(f.read()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment