Skip to content

Instantly share code, notes, and snippets.

@jroakes
Created February 2, 2019 18:37
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jroakes/f5b340ba6a054a0540e7f0ad4f7370fa to your computer and use it in GitHub Desktop.
Save jroakes/f5b340ba6a054a0540e7f0ad4f7370fa to your computer and use it in GitHub Desktop.
const chromium = require('chrome-aws-lambda');
const puppeteer = require('puppeteer-core');
const extractor = require('unfluff');
const summarize = require('summarize');
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
const done = (err, res) => callback(null, {
statusCode: err ? '503' : '200',
body: err ? err : JSON.stringify(res),
headers: {
'Content-Type': 'application/json'
},
});
function getUA(){
const uastrings = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36"
];
return uastrings[Math.round( Math.random() * (uastrings.length-1), 0 )];
}
try {
var data = {};
const path = unescape(event.pathParameters.proxy);
const mobile = false;
var browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless,
});
let page = await browser.newPage();
page.setUserAgent(getUA());
page.setViewport({ width: 1000, height: 1000, mobile });
await page.goto(path, { waitUntil : 'domcontentloaded' } );
let content = await page.content();
let ext = extractor(content);
let stats = summarize(content)
data = Object.assign(data, stats, ext);
} catch (error) {
//return done(error);
return done(false, error.message + '\n\n' + JSON.stringify(event));
} finally {
if (browser !== null) {
await browser.close();
}
}
return done(false, data)
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment