joedougherty/nihscrape.py

## nihscrape.py
import requests
from bs4 import BeautifulSoup

# Set the URL to download
endpoint = 'http://grants.nih.gov/searchGuide/Search_Guide_Results.cfm?Activity_Code=&Expdate_On_After=&OrderOn=ExpirationDate&OrderDirection=ASC&NoticesToo=0&OpeningDate_On_After=&Parent_FOA=All&PrimaryIC=Any&RelDate_On_After=&Status=1&SearchTerms=HIV&PAsToo=1&RFAsToo=1'

# Pull down the HTML from that URL
r = requests.get(endpoint)
page = r.content

# Make this parseable
parsed_page = BeautifulSoup(page)

# Now that you have this parsed_page object, you can call
# the methods from BeautifulSoup on it.
# Docs: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
#
# Taking a quick look at the markup, we can see the target table is
# 1.) a <table> element (precisely as expected) and 2.) has a class
# of "searchtable."
#
# All we need to do it use the 'find' method to extract the table
# with this class.
#
# Take a look at the docs to see how this works.
extracted_table = parsed_page.find("table", {"class" : "searchtable"})

# This will help convert any special characters into something non-ugly
extracted_table = extracted_table.prettify('latin-1')

print(extracted_table)
	import requests
	from bs4 import BeautifulSoup

	# Set the URL to download
	endpoint = 'http://grants.nih.gov/searchGuide/Search_Guide_Results.cfm?Activity_Code=&Expdate_On_After=&OrderOn=ExpirationDate&OrderDirection=ASC&NoticesToo=0&OpeningDate_On_After=&Parent_FOA=All&PrimaryIC=Any&RelDate_On_After=&Status=1&SearchTerms=HIV&PAsToo=1&RFAsToo=1'

	# Pull down the HTML from that URL
	r = requests.get(endpoint)
	page = r.content

	# Make this parseable
	parsed_page = BeautifulSoup(page)

	# Now that you have this parsed_page object, you can call
	# the methods from BeautifulSoup on it.
	# Docs: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
	#
	# Taking a quick look at the markup, we can see the target table is
	# 1.) a <table> element (precisely as expected) and 2.) has a class
	# of "searchtable."
	#
	# All we need to do it use the 'find' method to extract the table
	# with this class.
	#
	# Take a look at the docs to see how this works.
	extracted_table = parsed_page.find("table", {"class" : "searchtable"})

	# This will help convert any special characters into something non-ugly
	extracted_table = extracted_table.prettify('latin-1')

	print(extracted_table)