Skip to content

Instantly share code, notes, and snippets.

@spaceemotion
Created July 23, 2020 23:49
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save spaceemotion/6a1c16f1a8918a728227e7da18521b43 to your computer and use it in GitHub Desktop.
Save spaceemotion/6a1c16f1a8918a728227e7da18521b43 to your computer and use it in GitHub Desktop.
Custom Browsershot browser.js to run on AWS Lambda via Laravel Vapor
const chromium = require('chrome-aws-lambda');
const fs = require('fs');
const URL = require('url').URL;
const URLParse = require('url').parse;
const [, , ...args] = process.argv;
/**
* There are two ways for Browsershot to communicate with puppeteer:
* - By giving a options JSON dump as an argument
* - Or by providing a temporary file with the options JSON dump,
* the path to this file is then given as an argument with the flag -f
*/
const request = args[0].startsWith('-f ')
? JSON.parse(fs.readFileSync(new URL(args[0].substring(3))))
: JSON.parse(args[0]);
const requestsList = [];
const getOutput = async (page, request) => {
let output;
if (request.action == 'requestsList') {
output = JSON.stringify(requestsList);
return output;
}
if (request.action == 'evaluate') {
output = await page.evaluate(request.options.pageFunction);
return output;
}
output = await page[request.action](request.options);
return output.toString('base64');
};
const callChrome = async () => {
let browser;
let page;
let output;
let remoteInstance;
try {
if (request.options.remoteInstanceUrl || request.options.browserWSEndpoint ) {
// default options
let options = {
ignoreHTTPSErrors: request.options.ignoreHttpsErrors
};
// choose only one method to connect to the browser instance
if ( request.options.remoteInstanceUrl ) {
options.browserURL = request.options.remoteInstanceUrl;
} else if ( request.options.browserWSEndpoint ) {
options.browserWSEndpoint = request.options.browserWSEndpoint;
}
try {
browser = await chromium.puppeteer.connect( options );
remoteInstance = true;
} catch (exception) { /** does nothing. fallbacks to launching a chromium instance */}
}
if (!browser) {
browser = await chromium.puppeteer.launch({
ignoreHTTPSErrors: request.options.ignoreHttpsErrors,
executablePath: await chromium.executablePath,
args: [...chromium.args, ...(request.options.args || [])],
});
}
page = await browser.newPage();
if (request.options && request.options.disableJavascript) {
await page.setJavaScriptEnabled(false);
}
await page.setRequestInterception(true);
page.on('request', request => {
requestsList.push({
url: request.url(),
});
request.continue();
});
if (request.options && request.options.disableImages) {
page.on('request', request => {
if (request.resourceType() === 'image')
request.abort();
else
request.continue();
});
}
if (request.options && request.options.blockDomains) {
var domainsArray = JSON.parse(request.options.blockDomains);
page.on('request', request => {
const hostname = URLParse(request.url()).hostname;
domainsArray.forEach(function(value){
if (hostname.indexOf(value) >= 0) request.abort();
});
request.continue();
});
}
if (request.options && request.options.blockUrls) {
var urlsArray = JSON.parse(request.options.blockUrls);
page.on('request', request => {
urlsArray.forEach(function(value){
if (request.url().indexOf(value) >= 0) request.abort();
});
request.continue();
});
}
if (request.options && request.options.dismissDialogs) {
page.on('dialog', async dialog => {
await dialog.dismiss();
});
}
if (request.options && request.options.userAgent) {
await page.setUserAgent(request.options.userAgent);
}
if (request.options && request.options.device) {
const devices = chromium.puppeteer.devices;
const device = devices[request.options.device];
await page.emulate(device);
}
if (request.options && request.options.emulateMedia) {
await page.emulateMediaType(request.options.emulateMedia);
}
if (request.options && request.options.viewport) {
await page.setViewport(request.options.viewport);
}
if (request.options && request.options.extraHTTPHeaders) {
await page.setExtraHTTPHeaders(request.options.extraHTTPHeaders);
}
if (request.options && request.options.authentication) {
await page.authenticate(request.options.authentication);
}
if (request.options && request.options.cookies) {
await page.setCookie(...request.options.cookies);
}
if (request.options && request.options.timeout) {
await page.setDefaultNavigationTimeout(request.options.timeout);
}
const requestOptions = {};
if (request.options && request.options.networkIdleTimeout) {
requestOptions.waitUntil = 'networkidle';
requestOptions.networkIdleTimeout = request.options.networkIdleTimeout;
} else if (request.options && request.options.waitUntil) {
requestOptions.waitUntil = request.options.waitUntil;
}
await page.goto(request.url, requestOptions);
if (request.options && request.options.disableImages) {
await page.evaluate(() => {
let images = document.getElementsByTagName('img');
while (images.length > 0) {
images[0].parentNode.removeChild(images[0]);
}
});
}
if (request.options && request.options.types) {
for (let i = 0, len = request.options.types.length; i < len; i++) {
let typeOptions = request.options.types[i];
await page.type(typeOptions.selector, typeOptions.text, {
'delay': typeOptions.delay,
});
}
}
if (request.options && request.options.selects) {
for (let i = 0, len = request.options.selects.length; i < len; i++) {
let selectOptions = request.options.selects[i];
await page.select(selectOptions.selector, selectOptions.value);
}
}
if (request.options && request.options.clicks) {
for (let i = 0, len = request.options.clicks.length; i < len; i++) {
let clickOptions = request.options.clicks[i];
await page.click(clickOptions.selector, {
'button': clickOptions.button,
'clickCount': clickOptions.clickCount,
'delay': clickOptions.delay,
});
}
}
if (request.options && request.options.addStyleTag) {
await page.addStyleTag(JSON.parse(request.options.addStyleTag));
}
if (request.options && request.options.addScriptTag) {
await page.addScriptTag(JSON.parse(request.options.addScriptTag));
}
if (request.options.delay) {
await page.waitFor(request.options.delay);
}
if (request.options.selector) {
const element = await page.$(request.options.selector);
if (element === null) {
throw {type: 'ElementNotFound'};
}
request.options.clip = await element.boundingBox();
}
if (request.options.function) {
let functionOptions = {
polling: request.options.functionPolling,
timeout: request.options.functionTimeout || request.options.timeout
};
await page.waitForFunction(request.options.function, functionOptions);
}
output = await getOutput(page, request);
if (!request.options.path) {
console.log(output);
}
if (remoteInstance && page) {
await page.close();
}
await remoteInstance ? browser.disconnect() : browser.close();
} catch (exception) {
if (browser) {
if (remoteInstance && page) {
await page.close();
}
await remoteInstance ? browser.disconnect() : browser.close();
}
console.error(exception);
if (exception.type === 'ElementNotFound') {
process.exit(2);
}
process.exit(1);
}
};
callChrome();
@valentinfily
Copy link

Has anyone managed to make it work on Laravel Vapor with docker ?
I was using Lambda layers pretty successfully until recently, but Vapor requires to move to Docker images for future updates.

Until now I was using these layers in my vapor.yml file :

layers:      
  - "arn:aws:lambda:eu-west-3:553035198032:layer:nodejs12:37"
  - "vapor:php-7.4"
  - "arn:aws:lambda:eu-west-3:764866452798:layer:chrome-aws-lambda:20"

My Docker file (basically trying to replicate the behaviour of layers) looks like this :

FROM laravelphp/vapor:php74

RUN apk update && apk add --no-cache curl unzip

WORKDIR /opt

RUN curl -sSL https://github.com/shelfio/chrome-aws-lambda-layer/raw/master/chrome_aws_lambda.zip --output chrome_aws_lambda.zip

RUN unzip chrome_aws_lambda.zip -d /opt

RUN rm chrome_aws_lambda.zip

RUN curl -sSL https://github.com/lambci/node-custom-lambda/raw/master/v12.x/layer.zip --output node-custom-lambda.zip

RUN unzip node-custom-lambda.zip -d /opt/node-custom-lambda

RUN cp -avr /opt/node-custom-lambda/bin /opt/bin

RUN rm node-custom-lambda.zip && rm -rf node-custom-lambda

WORKDIR /

COPY . /var/task

@spaceemotion
Copy link
Author

Pretty sure you wouldn't need a chrome-aws-lambda setup when using the docker image variant at all.
chrome-aws-lambda only exists because of the 200mb(?) image limit of lambda - which for docker is 10g if I'm not mistaken...

You should be able to install chrome as usual and use browsershot like any other virtual server.

That being said; actually installing chrome and getting everything to run is a different matter entirely. There are many dependencies that you need that don't seem to exist in the amazonlinux system...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment