WilHall/WebpageThumbnailsInPython.md

## thumb.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

from cgi import parse_qs, escape
from selenium import webdriver
from uuid import uuid4
import cStringIO
import Image
import json

basePath = "/var/www/example.com/path/to/thumb/"
baseUrl = "http://example.com/path/to/thumbs/"
screenSize = (1024, 768)

def application(environ, start_response):

    d = parse_qs(environ['QUERY_STRING'])

    if not all(k in d for k in ['url', 'width', 'height']):
        output = json.dumps({'success': False})
    else:
        url = d['url'][0]
        width = int(d['width'][0])
        height = int(d['height'][0])
        output = ""

        driver = webdriver.PhantomJS(service_log_path=basePath + "ghostdriver.log")
        driver.set_script_timeout(30)
        driver.set_window_size(screenSize[0], screenSize[1])
        driver.get(url)

        screenshot_fname = str(uuid4()) + ".png"
        screenshot_fpath = "{0}thumbs/{1}".format(basePath, screenshot_fname)
        screenshot = cStringIO.StringIO(driver.get_screenshot_as_png())
        screenshot.seek(0)
        with open(screenshot_fpath, 'w+') as sfile:
            image = Image.open(screenshot)
            image.thumbnail((width, height), Image.ANTIALIAS)
            image.save(screenshot_fpath, 'PNG')
        screenshot.close()
        output = json.dumps({'success': True, 'thumb': baseUrl + screenshot_fname})

    status = '200 OK'
    response_headers = [('Content-type', 'application/json'),
                        ('Content-Length', str(len(output)))]
    start_response(status, response_headers)
    return [output]

## WebpageThumbnailsInPython.md

      
    Raw
  

              WebpageThumbnailsInPython.md
            
          
    #Webpage Thumbnails In Python
A small WSGI script that uses selenium's headless PhantomJS driver to capture webpage screenshots, and PIL to resize them. Cropping options could easily be added.
##Dependencies

Python selenium
Python PIL
PhantomJS
	#!/usr/bin/python
	# -- coding: utf-8 --

	from cgi import parse_qs, escape
	from selenium import webdriver
	from uuid import uuid4
	import cStringIO
	import Image
	import json

	basePath = "/var/www/example.com/path/to/thumb/"
	baseUrl = "http://example.com/path/to/thumbs/"
	screenSize = (1024, 768)

	def application(environ, start_response):

	d = parse_qs(environ['QUERY_STRING'])

	if not all(k in d for k in ['url', 'width', 'height']):
	output = json.dumps({'success': False})
	else:
	url = d['url'][0]
	width = int(d['width'][0])
	height = int(d['height'][0])
	output = ""

	driver = webdriver.PhantomJS(service_log_path=basePath + "ghostdriver.log")
	driver.set_script_timeout(30)
	driver.set_window_size(screenSize[0], screenSize[1])
	driver.get(url)

	screenshot_fname = str(uuid4()) + ".png"
	screenshot_fpath = "{0}thumbs/{1}".format(basePath, screenshot_fname)
	screenshot = cStringIO.StringIO(driver.get_screenshot_as_png())
	screenshot.seek(0)
	with open(screenshot_fpath, 'w+') as sfile:
	image = Image.open(screenshot)
	image.thumbnail((width, height), Image.ANTIALIAS)
	image.save(screenshot_fpath, 'PNG')
	screenshot.close()
	output = json.dumps({'success': True, 'thumb': baseUrl + screenshot_fname})

	status = '200 OK'
	response_headers = [('Content-type', 'application/json'),
	('Content-Length', str(len(output)))]
	start_response(status, response_headers)
	return [output]