Created
February 2, 2019 18:37
-
-
Save jroakes/f5b340ba6a054a0540e7f0ad4f7370fa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const chromium = require('chrome-aws-lambda'); | |
const puppeteer = require('puppeteer-core'); | |
const extractor = require('unfluff'); | |
const summarize = require('summarize'); | |
exports.handler = async (event, context, callback) => { | |
console.log('Received event:', JSON.stringify(event, null, 2)); | |
const done = (err, res) => callback(null, { | |
statusCode: err ? '503' : '200', | |
body: err ? err : JSON.stringify(res), | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
}); | |
function getUA(){ | |
const uastrings = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", | |
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36", | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25", | |
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", | |
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10", | |
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", | |
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", | |
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36" | |
]; | |
return uastrings[Math.round( Math.random() * (uastrings.length-1), 0 )]; | |
} | |
try { | |
var data = {}; | |
const path = unescape(event.pathParameters.proxy); | |
const mobile = false; | |
var browser = await puppeteer.launch({ | |
args: chromium.args, | |
defaultViewport: chromium.defaultViewport, | |
executablePath: await chromium.executablePath, | |
headless: chromium.headless, | |
}); | |
let page = await browser.newPage(); | |
page.setUserAgent(getUA()); | |
page.setViewport({ width: 1000, height: 1000, mobile }); | |
await page.goto(path, { waitUntil : 'domcontentloaded' } ); | |
let content = await page.content(); | |
let ext = extractor(content); | |
let stats = summarize(content) | |
data = Object.assign(data, stats, ext); | |
} catch (error) { | |
//return done(error); | |
return done(false, error.message + '\n\n' + JSON.stringify(event)); | |
} finally { | |
if (browser !== null) { | |
await browser.close(); | |
} | |
} | |
return done(false, data) | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment