anderser/grabber.py

## grabber.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import subprocess
import logging
from django.conf import settings

logger = logging.getLogger('pagetracker.grabber')

def grab_page(url, outfile):

    """
    Grabs a webpage to pdf using wkhtmltopdf.
    Arguments are grab url and outputfilename.
    Returns True on success, False on error (and raises exception that is logged)
    Requires WKHTMLTOPDF_LOCATION (location of executable) and WKHTMLTOPDF_OUTPUT_DIR
    (where you want PDFs to end up) in Django settings file
    """
    #More info here: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html

    logger.info('Starting grab of %s to %s' % (url, outfile))

    try:
        r = subprocess.check_call(
                                  [
                                   os.path.join(settings.WKHTMLTOPDF_LOCATION, "wkhtmltopdf"),
                                   "--quiet",
                                   "--javascript-delay", "400",
                                   "--title", "%s" % url,
                                   "--lowquality",
                                   "--disable-plugins",
                                   #"--load-error-handling", "ignore",
                                   url,
                                   os.path.join(settings.WKHTMLTOPDF_OUTPUT_DIR,outfile)
                                    ],
                                )

    except subprocess.CalledProcessError, e:
        logger.error(e)
        return False

    else:
        logger.info('Ending grab of %s to %s successful' % (url, outfile))
        return True
	#!/usr/bin/python
	# -- coding: utf-8 --
	import os
	import subprocess
	import logging
	from django.conf import settings

	logger = logging.getLogger('pagetracker.grabber')

	def grab_page(url, outfile):

	"""
	Grabs a webpage to pdf using wkhtmltopdf.
	Arguments are grab url and outputfilename.
	Returns True on success, False on error (and raises exception that is logged)
	Requires WKHTMLTOPDF_LOCATION (location of executable) and WKHTMLTOPDF_OUTPUT_DIR
	(where you want PDFs to end up) in Django settings file
	"""
	#More info here: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html

	logger.info('Starting grab of %s to %s' % (url, outfile))

	try:
	r = subprocess.check_call(
	[
	os.path.join(settings.WKHTMLTOPDF_LOCATION, "wkhtmltopdf"),
	"--quiet",
	"--javascript-delay", "400",
	"--title", "%s" % url,
	"--lowquality",
	"--disable-plugins",
	#"--load-error-handling", "ignore",
	url,
	os.path.join(settings.WKHTMLTOPDF_OUTPUT_DIR,outfile)
	],
	)

	except subprocess.CalledProcessError, e:
	logger.error(e)
	return False

	else:
	logger.info('Ending grab of %s to %s successful' % (url, outfile))
	return True