Created
July 3, 2011 17:52
-
-
Save anderser/1062417 to your computer and use it in GitHub Desktop.
Simple python script for use in Django to grab webpage to PDF using wkhtmltodf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import os | |
import subprocess | |
import logging | |
from django.conf import settings | |
logger = logging.getLogger('pagetracker.grabber') | |
def grab_page(url, outfile): | |
""" | |
Grabs a webpage to pdf using wkhtmltopdf. | |
Arguments are grab url and outputfilename. | |
Returns True on success, False on error (and raises exception that is logged) | |
Requires WKHTMLTOPDF_LOCATION (location of executable) and WKHTMLTOPDF_OUTPUT_DIR | |
(where you want PDFs to end up) in Django settings file | |
""" | |
#More info here: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html | |
logger.info('Starting grab of %s to %s' % (url, outfile)) | |
try: | |
r = subprocess.check_call( | |
[ | |
os.path.join(settings.WKHTMLTOPDF_LOCATION, "wkhtmltopdf"), | |
"--quiet", | |
"--javascript-delay", "400", | |
"--title", "%s" % url, | |
"--lowquality", | |
"--disable-plugins", | |
#"--load-error-handling", "ignore", | |
url, | |
os.path.join(settings.WKHTMLTOPDF_OUTPUT_DIR,outfile) | |
], | |
) | |
except subprocess.CalledProcessError, e: | |
logger.error(e) | |
return False | |
else: | |
logger.info('Ending grab of %s to %s successful' % (url, outfile)) | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment