Skip to content

Instantly share code, notes, and snippets.

@WilHall
Created November 29, 2013 00:45
Show Gist options
  • Save WilHall/7700050 to your computer and use it in GitHub Desktop.
Save WilHall/7700050 to your computer and use it in GitHub Desktop.
Webpage Thumbnails In Python
#!/usr/bin/python
# -*- coding: utf-8 -*-
from cgi import parse_qs, escape
from selenium import webdriver
from uuid import uuid4
import cStringIO
import Image
import json
basePath = "/var/www/example.com/path/to/thumb/"
baseUrl = "http://example.com/path/to/thumbs/"
screenSize = (1024, 768)
def application(environ, start_response):
d = parse_qs(environ['QUERY_STRING'])
if not all(k in d for k in ['url', 'width', 'height']):
output = json.dumps({'success': False})
else:
url = d['url'][0]
width = int(d['width'][0])
height = int(d['height'][0])
output = ""
driver = webdriver.PhantomJS(service_log_path=basePath + "ghostdriver.log")
driver.set_script_timeout(30)
driver.set_window_size(screenSize[0], screenSize[1])
driver.get(url)
screenshot_fname = str(uuid4()) + ".png"
screenshot_fpath = "{0}thumbs/{1}".format(basePath, screenshot_fname)
screenshot = cStringIO.StringIO(driver.get_screenshot_as_png())
screenshot.seek(0)
with open(screenshot_fpath, 'w+') as sfile:
image = Image.open(screenshot)
image.thumbnail((width, height), Image.ANTIALIAS)
image.save(screenshot_fpath, 'PNG')
screenshot.close()
output = json.dumps({'success': True, 'thumb': baseUrl + screenshot_fname})
status = '200 OK'
response_headers = [('Content-type', 'application/json'),
('Content-Length', str(len(output)))]
start_response(status, response_headers)
return [output]

#Webpage Thumbnails In Python A small WSGI script that uses selenium's headless PhantomJS driver to capture webpage screenshots, and PIL to resize them. Cropping options could easily be added.

##Dependencies

  • Python selenium
  • Python PIL
  • PhantomJS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment