Skip to content

Instantly share code, notes, and snippets.

@aniemerg
Created May 24, 2012 14:36
Show Gist options
  • Save aniemerg/2781935 to your computer and use it in GitHub Desktop.
Save aniemerg/2781935 to your computer and use it in GitHub Desktop.
Script to retrieve the number of U.S. utility patents granted by week during 2009-2010
import urllib2
import re
import datetime
import pprint
import time
# Start by Getting Date of Last Tuesday of 2010
current = datetime.date(2010,12,31)
if current.weekday() == 0:
#if New Years Eve falls on Monday, go back into previous week
current = current - datetime.timedelta(days=1)
tuesday = current - datetime.timedelta(days=current.weekday()) + datetime.timedelta(days=1)
weeks = range(105)
#weeks = [1]
results = []
#Loop over weeks
for week in weeks:
current = tuesday - datetime.timedelta(days=(week*7))
month = str(current.month)
day = str(current.day)
year = str(current.year)
# Create Search Address with Correct Search Options
search_address = "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm"
options = "&r=0&p=1&f=S&l=50&Query=+ISD/%s/%s/%s+AND+APT/1&d=PTXT" % (month, day, year)
the_add = search_address + options
# Query USPTO search
response = urllib2.urlopen(the_add)
html = response.read()
# Extract Patent Numbers and Sort
Number = re.findall(': (\d\d\d\d) patents\.', html)
thedate = "%s/%s/%s" % (month, day, year)
result = (thedate, Number[0])
results.append(result)
#Print out result
print "The Total Number of U.S. Patents issued the week of %s/%s/%s is: %s" % \
(month, day, year, Number[0])
#Let's not flood the patent office
time.sleep(4)
#Save to File
outfile = open('Two_years_of_Grants_2009_2010.csv', 'w')
for result in results:
line = "[\'%s\', %s],\n" % (result[0], result[1])
outfile.write(line)
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment