meikj/ls1hw1.py

## ls1hw1.py
#!/usr/bin/env python
'''
Udacity CS101
Lesson 1, Homework 1
'''

PAGE = '''
<html xmlns="http://www.w3.org/1999/xhtml"><br/> <head><br/><title>Udacity</title> <br/></head><br/><br/><body> <br/><h1>Udacity</h1><br/><br/> <p><b>Udacity</b> is a private institution of <a href="http://www.wikipedia.org/wiki/Higher_education"> higher education founded by</a> <a href="http://www.wikipedia.org/wiki/Sebastian_Thrun">Sebastian Thrun</a>, David Stavens, and Mike Sokolsky with the goal to provide university-level education that is "both high quality and low cost".<br/>It is the outgrowth of a free computer science class offered in 2011 through Stanford University. Currently, Udacity is working on its second course on building a search engine. Udacity was announced at the 2012 <a href="http://www.wikipedia.org/wiki/Digital_Life_Design">Digital Life Design</a> conference.</p><br/></body><br/></html>
'''

def fetch_link(page, start=0):
    '''
    Fetch the URL of a hyperlink in a HTML page from a specified starting
    position. Returns a pair containing the URL and next position.
    '''
    start_link = page.find('<a href=', start)

    if start_link is not -1:
        url_start = page.find('"', start_link) + 1
        url_end = page.find('"', url_start)

        url = page[url_start:url_end]
        return (url, url_end)
    else:
        return ('', -1)

def main():
    '''
    Example usage of fetch_link() using PAGE
    '''
    next_pos = 0
    while True:
        url, next_pos = fetch_link(PAGE, next_pos)

        if next_pos is not -1:
            print('url = %s, next_pos = %d' % (url, next_pos))
        else:
            break

if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	'''
	Udacity CS101
	Lesson 1, Homework 1
	'''

	PAGE = '''
	<html xmlns="http://www.w3.org/1999/xhtml"><br/> <head><br/><title>Udacity</title> <br/></head><br/><br/><body> <br/><h1>Udacity</h1><br/><br/> <p><b>Udacity</b> is a private institution of <a href="http://www.wikipedia.org/wiki/Higher_education"> higher education founded by</a> <a href="http://www.wikipedia.org/wiki/Sebastian_Thrun">Sebastian Thrun</a>, David Stavens, and Mike Sokolsky with the goal to provide university-level education that is "both high quality and low cost".<br/>It is the outgrowth of a free computer science class offered in 2011 through Stanford University. Currently, Udacity is working on its second course on building a search engine. Udacity was announced at the 2012 <a href="http://www.wikipedia.org/wiki/Digital_Life_Design">Digital Life Design</a> conference.</p><br/></body><br/></html>
	'''

	def fetch_link(page, start=0):
	'''
	Fetch the URL of a hyperlink in a HTML page from a specified starting
	position. Returns a pair containing the URL and next position.
	'''
	start_link = page.find('<a href=', start)

	if start_link is not -1:
	url_start = page.find('"', start_link) + 1
	url_end = page.find('"', url_start)

	url = page[url_start:url_end]
	return (url, url_end)
	else:
	return ('', -1)

	def main():
	'''
	Example usage of fetch_link() using PAGE
	'''
	next_pos = 0
	while True:
	url, next_pos = fetch_link(PAGE, next_pos)

	if next_pos is not -1:
	print('url = %s, next_pos = %d' % (url, next_pos))
	else:
	break

	if __name__ == '__main__':
	main()