nharrell04/georgetown

## georgetown
from urllib2 import urlopen

def gethtml(url):
    return urlopen(url).read()

def extract(url):
    html = gethtml(url)
    return html.find('guides')
    # finds the location of the first mention of "guides"

def lookfor(url):
    z = gethtml(url)
    y = 0
    for guides in z:
        y = y+1
    return y
    # i thought this would look for "guides" in the html, but that seems to return way too high of a number
    # a command+f search of the html verifies this
    # possibly counting each character/number in the html?

def parse_guides(url):
    q = gethtml(url)
    start_guide = q.find("/library/research/guides/abbre")
    print start_guide
    r = q[start_guide:]
    z = r.split("/library/research/guides/")
    return z
    # returns list of html split w/ above path
    # format issues

# print gethtml("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print extract("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print lookfor("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print parse_guides("http://www.law.georgetown.edu/library/research/guides/index.cfm")
	from urllib2 import urlopen

	def gethtml(url):
	return urlopen(url).read()

	def extract(url):
	html = gethtml(url)
	return html.find('guides')
	# finds the location of the first mention of "guides"

	def lookfor(url):
	z = gethtml(url)
	y = 0
	for guides in z:
	y = y+1
	return y
	# i thought this would look for "guides" in the html, but that seems to return way too high of a number
	# a command+f search of the html verifies this
	# possibly counting each character/number in the html?

	def parse_guides(url):
	q = gethtml(url)
	start_guide = q.find("/library/research/guides/abbre")
	print start_guide
	r = q[start_guide:]
	z = r.split("/library/research/guides/")
	return z
	# returns list of html split w/ above path
	# format issues

	# print gethtml("http://www.law.georgetown.edu/library/research/guides/index.cfm")
	print extract("http://www.law.georgetown.edu/library/research/guides/index.cfm")
	print lookfor("http://www.law.georgetown.edu/library/research/guides/index.cfm")
	print parse_guides("http://www.law.georgetown.edu/library/research/guides/index.cfm")