Skip to content

Instantly share code, notes, and snippets.

@meikj
Created July 2, 2013 21:10
Show Gist options
  • Save meikj/5913166 to your computer and use it in GitHub Desktop.
Save meikj/5913166 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
'''
Udacity CS101
Lesson 1, Homework 1
'''
PAGE = '''
<html xmlns="http://www.w3.org/1999/xhtml"><br/> <head><br/><title>Udacity</title> <br/></head><br/><br/><body> <br/><h1>Udacity</h1><br/><br/> <p><b>Udacity</b> is a private institution of <a href="http://www.wikipedia.org/wiki/Higher_education"> higher education founded by</a> <a href="http://www.wikipedia.org/wiki/Sebastian_Thrun">Sebastian Thrun</a>, David Stavens, and Mike Sokolsky with the goal to provide university-level education that is "both high quality and low cost".<br/>It is the outgrowth of a free computer science class offered in 2011 through Stanford University. Currently, Udacity is working on its second course on building a search engine. Udacity was announced at the 2012 <a href="http://www.wikipedia.org/wiki/Digital_Life_Design">Digital Life Design</a> conference.</p><br/></body><br/></html>
'''
def fetch_link(page, start=0):
'''
Fetch the URL of a hyperlink in a HTML page from a specified starting
position. Returns a pair containing the URL and next position.
'''
start_link = page.find('<a href=', start)
if start_link is not -1:
url_start = page.find('"', start_link) + 1
url_end = page.find('"', url_start)
url = page[url_start:url_end]
return (url, url_end)
else:
return ('', -1)
def main():
'''
Example usage of fetch_link() using PAGE
'''
next_pos = 0
while True:
url, next_pos = fetch_link(PAGE, next_pos)
if next_pos is not -1:
print('url = %s, next_pos = %d' % (url, next_pos))
else:
break
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment