Skip to content

Instantly share code, notes, and snippets.

@nharrell04
Created July 29, 2013 03:42
Show Gist options
  • Save nharrell04/6102026 to your computer and use it in GitHub Desktop.
Save nharrell04/6102026 to your computer and use it in GitHub Desktop.
from urllib2 import urlopen
def gethtml(url):
return urlopen(url).read()
def extract(url):
html = gethtml(url)
return html.find('guides')
# finds the location of the first mention of "guides"
def lookfor(url):
z = gethtml(url)
y = 0
for guides in z:
y = y+1
return y
# i thought this would look for "guides" in the html, but that seems to return way too high of a number
# a command+f search of the html verifies this
# possibly counting each character/number in the html?
def parse_guides(url):
q = gethtml(url)
start_guide = q.find("/library/research/guides/abbre")
print start_guide
r = q[start_guide:]
z = r.split("/library/research/guides/")
return z
# returns list of html split w/ above path
# format issues
# print gethtml("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print extract("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print lookfor("http://www.law.georgetown.edu/library/research/guides/index.cfm")
print parse_guides("http://www.law.georgetown.edu/library/research/guides/index.cfm")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment