Skip to content

Instantly share code, notes, and snippets.

@anderser
Created May 22, 2012 18:13
Show Gist options
  • Save anderser/2770668 to your computer and use it in GitHub Desktop.
Save anderser/2770668 to your computer and use it in GitHub Desktop.
Grab webpage from python/django with wkhtmltopdf
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import shlex, subprocess
import logging
from django.conf import settings
logger = logging.getLogger('pagetracker.grabber')
def grab_page(url, outfile):
"""
Grabs a webpage to pdf using wkhtmltopdf.
Arguments are grab url and outputfilename.
Returns True on success, False on error (and raises exception that is logged)
Requires WKHTMLTOPDF_LOCATION (location of executable) and WKHTMLTOPDF_OUTPUT_DIR
(where you want PDFs to end up) in Django settings file
"""
#More info here: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html
logger.info('Starting grab of %s to %s' % (url, outfile))
options = [
"--quiet",
#"--javascript-delay", "400",
"--title", "%s" % url,
"--lowquality",
#"--disable-plugins",
url,
os.path.join(settings.WKHTMLTOPDF_OUTPUT_DIR,outfile)
]
if settings.WKHTMLTOPDF_LOCATION == "Mac":
wkhtmltopdf = ["/usr/local/bin/wkhtmltopdf"]
else:
wkhtmltopdf = shlex.split('/usr/bin/xvfb-run -a -s "-screen 0 1024x768x16" /usr/bin/wkhtmltopdf')
wkhtmltopdf.extend(options)
#print wkhtmltopdf
try:
r = subprocess.check_call(wkhtmltopdf)
except Exception, e:
logger.error("Could not grab PDF", e)
return False
else:
logger.info('Ending grab of %s to %s successful' % (url, outfile))
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment