Created
May 22, 2012 18:13
-
-
Save anderser/2770668 to your computer and use it in GitHub Desktop.
Grab webpage from python/django with wkhtmltopdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import os | |
import shlex, subprocess | |
import logging | |
from django.conf import settings | |
logger = logging.getLogger('pagetracker.grabber') | |
def grab_page(url, outfile): | |
""" | |
Grabs a webpage to pdf using wkhtmltopdf. | |
Arguments are grab url and outputfilename. | |
Returns True on success, False on error (and raises exception that is logged) | |
Requires WKHTMLTOPDF_LOCATION (location of executable) and WKHTMLTOPDF_OUTPUT_DIR | |
(where you want PDFs to end up) in Django settings file | |
""" | |
#More info here: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html | |
logger.info('Starting grab of %s to %s' % (url, outfile)) | |
options = [ | |
"--quiet", | |
#"--javascript-delay", "400", | |
"--title", "%s" % url, | |
"--lowquality", | |
#"--disable-plugins", | |
url, | |
os.path.join(settings.WKHTMLTOPDF_OUTPUT_DIR,outfile) | |
] | |
if settings.WKHTMLTOPDF_LOCATION == "Mac": | |
wkhtmltopdf = ["/usr/local/bin/wkhtmltopdf"] | |
else: | |
wkhtmltopdf = shlex.split('/usr/bin/xvfb-run -a -s "-screen 0 1024x768x16" /usr/bin/wkhtmltopdf') | |
wkhtmltopdf.extend(options) | |
#print wkhtmltopdf | |
try: | |
r = subprocess.check_call(wkhtmltopdf) | |
except Exception, e: | |
logger.error("Could not grab PDF", e) | |
return False | |
else: | |
logger.info('Ending grab of %s to %s successful' % (url, outfile)) | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment