Created
July 2, 2013 21:10
-
-
Save meikj/5913166 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Udacity CS101 | |
Lesson 1, Homework 1 | |
''' | |
PAGE = ''' | |
<html xmlns="http://www.w3.org/1999/xhtml"><br/> <head><br/><title>Udacity</title> <br/></head><br/><br/><body> <br/><h1>Udacity</h1><br/><br/> <p><b>Udacity</b> is a private institution of <a href="http://www.wikipedia.org/wiki/Higher_education"> higher education founded by</a> <a href="http://www.wikipedia.org/wiki/Sebastian_Thrun">Sebastian Thrun</a>, David Stavens, and Mike Sokolsky with the goal to provide university-level education that is "both high quality and low cost".<br/>It is the outgrowth of a free computer science class offered in 2011 through Stanford University. Currently, Udacity is working on its second course on building a search engine. Udacity was announced at the 2012 <a href="http://www.wikipedia.org/wiki/Digital_Life_Design">Digital Life Design</a> conference.</p><br/></body><br/></html> | |
''' | |
def fetch_link(page, start=0): | |
''' | |
Fetch the URL of a hyperlink in a HTML page from a specified starting | |
position. Returns a pair containing the URL and next position. | |
''' | |
start_link = page.find('<a href=', start) | |
if start_link is not -1: | |
url_start = page.find('"', start_link) + 1 | |
url_end = page.find('"', url_start) | |
url = page[url_start:url_end] | |
return (url, url_end) | |
else: | |
return ('', -1) | |
def main(): | |
''' | |
Example usage of fetch_link() using PAGE | |
''' | |
next_pos = 0 | |
while True: | |
url, next_pos = fetch_link(PAGE, next_pos) | |
if next_pos is not -1: | |
print('url = %s, next_pos = %d' % (url, next_pos)) | |
else: | |
break | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment