A little Python script that automatically downloads images associated with a patent listing
Jeff Thompson | 2017 |
A little Python script that automatically downloads
images associated with a patent listing. Use Google's
Patent Search first, then the "Download (CSV)" button
to save a file with all the results.
import csv, urllib, urllib2, re, os
csv_filename = 'search.csv' # file to load from
image_folder = 'images' # folder to save images to (will be created)
print 'extracting urls...'
with open('search.csv') as f: # skip first line (with search details) # skip second line (csv header)
data = csv.reader(f, quotechar='"')
listings = []
for d in data:
id = d[0]
date = d[7]
url = d[8]
listings.append([id, date, url])
print '- found ' + str(len(listings))
print 'getting images...'
if not os.path.exists(image_folder):
for listing in listings:
id = listing[0]
date = listing[1]
url = listing[2]
print '- ' + url
print ' - downloading page source...'
# use a "user agent" otherwise we'll get blocked by Google :)
user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'
headers = { 'User-Agent': user_agent }
req = urllib2.Request(url, None, headers)
response = urllib2.urlopen(req)
html =
images = re.findall('<meta itemprop="full" content="(.*?)"', html, re.M)
print ' - downloading ' + str(len(images)) + ' images...'
for i, image in enumerate(images):
filename = id + '-' + str(i).zfill(3) + '.png'
urllib.urlretrieve(image, os.path.join(image_folder, filename))
print 'saving listing data to file...'
with open('listings.csv', 'w') as f:
for l in listings:
f.write(l[0] + ',' + l[1] + ',' + l[2] + ',')
f.write('"' + ','.join(l[3]) + '"\n')
print '- all done'
