davidejones/getlinks.py

## getlinks.py
from html.parser import HTMLParser
import urllib.request


class LinkParser(HTMLParser):
    def __init__(self, *, convert_charrefs=True):
        super().__init__(convert_charrefs=convert_charrefs)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            self.links.extend([v for k, v in attrs if k == 'href'])


def main():
    with urllib.request.urlopen('http://python.org') as r:
        if r.code == 200:
            html = r.read()
            parser = LinkParser()
            parser.feed(str(html))
            print(parser.links)


if __name__ == '__main__':
    main()
	from html.parser import HTMLParser
	import urllib.request


	class LinkParser(HTMLParser):
	def __init__(self, *, convert_charrefs=True):
	super().__init__(convert_charrefs=convert_charrefs)
	self.links = []

	def handle_starttag(self, tag, attrs):
	if tag == 'a':
	self.links.extend([v for k, v in attrs if k == 'href'])


	def main():
	with urllib.request.urlopen('http://python.org') as r:
	if r.code == 200:
	html = r.read()
	parser = LinkParser()
	parser.feed(str(html))
	print(parser.links)


	if __name__ == '__main__':
	main()