Skip to content

Instantly share code, notes, and snippets.

@Xowap
Last active July 9, 2018 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Xowap/27979b779d971e8109f52997886434c4 to your computer and use it in GitHub Desktop.
Save Xowap/27979b779d971e8109f52997886434c4 to your computer and use it in GitHub Desktop.
Fix pyppeteer download location
"""
This is a monkey patch of the requests-html module so it can download chrome
and store its data in a custom location and not in the home directory (which
is not writable on servers and also we do not want chrome stored there).
"""
from os.path import (
join,
)
from pathlib import (
Path,
)
from sys import (
modules,
)
from django.conf import (
settings,
)
def clean_modules():
"""
Unloads any requests_html/pyppeteer module in order to make sure that
everything is clean and that we can override things. Also used when the
patching is done to get things back to their normal state in the outside
world.
"""
to_delete = []
for module in modules.keys():
is_interesting = \
module == 'requests_html' \
or module.startswith('requests_html.') \
or module == 'pyppeteer' \
or module.startswith('pyppeteer.')
if is_interesting:
to_delete.append(module)
for module in to_delete:
del modules[module]
def set_download_paths(m):
"""
Patch paths in the Puppeteer module.
"""
m.DOWNLOADS_FOLDER = Path(settings.PYPPETEER_DIR) / 'local-chromium'
m.chromiumExecutable = {
'linux': m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-linux' / 'chrome',
'mac': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-mac' /
'Chromium.app' / 'Contents' / 'MacOS' / 'Chromium'),
'win32': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-win32' /
'chrome.exe'),
'win64': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-win32' /
'chrome.exe'),
}
def make_html_session():
"""
Applies a series of monkey patch to the pyppeteer module in order to get
it to download its files to an arbitrary location. Returns an HTMLSession
version which has everything in place. Also cleans up so if you import the
lib for real and not through this module you'll get the default behaviour.
"""
clean_modules()
from pyppeteer import chromium_downloader
set_download_paths(chromium_downloader)
import pyppeteer
original_launch = pyppeteer.launch
def fixed_launch(*args, **kwargs):
kwargs['userDataDir'] = join(settings.PYPPETEER_DIR, 'data')
return original_launch(*args, **kwargs)
pyppeteer.launch = fixed_launch
from requests_html import HTMLSession
clean_modules()
return HTMLSession
HTMLSession = make_html_session()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment