Skip to content

Instantly share code, notes, and snippets.

@aniemerg
Created May 4, 2012 02:08
Show Gist options
  • Save aniemerg/2591343 to your computer and use it in GitHub Desktop.
Save aniemerg/2591343 to your computer and use it in GitHub Desktop.
Python script to scrape the number of Issued U.S. utility patents
import urllib2
import re
import datetime
import pprint
# Start by Getting Date of Last Tuesday
today = datetime.date.today()
tuesday = today - datetime.timedelta(days=today.weekday()) + datetime.timedelta(days=1)
month = str(tuesday.month)
day = str(tuesday.day)
year = str(tuesday.year)
# Create Search Address with Correct Search Options
search_address = "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm"
options = "&r=0&p=1&f=S&l=50&Query=+ISD/%s/%s/%s+AND+APT/1&d=PTXT" % (month, day, year)
the_add = search_address + options
# Query USPTO search
response = urllib2.urlopen(the_add)
html = response.read()
# Extract Patent Numbers and Sort
nums = re.findall('(\d),(\d\d\d),(\d\d\d)', html)
nums2 = []
for num in nums:
nums2.append(''.join(num))
sortednums = sorted(nums2)
highest = sortednums[len(sortednums)-1]
#Print out result
print "The Total Number of U.S. Patents issued is: %s" % str(highest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment