Skip to content

Instantly share code, notes, and snippets.

@jpbalarini
Created January 22, 2019 14:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpbalarini/e9e5342559626362118abe65341fc55f to your computer and use it in GitHub Desktop.
Save jpbalarini/e9e5342559626362118abe65341fc55f to your computer and use it in GitHub Desktop.
Pupeteer downloder server
const puppeteer = require('puppeteer')
const express = require('express')
const bodyParser = require('body-parser')
const crypto = require('crypto')
const fs = require('fs')
const port = 3000
var app = express()
app.use(bodyParser.json())
const runJob = async (url) => {
var browser
var title = ''
var filename = ''
var html = ''
try {
browser = await puppeteer.launch({headless: true})
const page = await browser.newPage()
page.setViewport({ width: 1280, height: 1024 })
// consider navigation to be finished when there are no more than 2 network connections for at least 500 ms
await page.goto(url, {"waitUntil" : "networkidle2"})
title = await page.title()
filename = crypto.createHash('md5').update(url).digest('hex')
await page.screenshot({path: 'pages/' + filename + '.png'})
html = await page.content();
// save file synchronously
fs.writeFileSync('pages/' + filename + '.html', html, function(err) {
if (err) {
return console.log('Error writing html: ' + err)
}
});
} catch (err) {
console.log('There was an error: ' + err.message)
} finally {
if (browser) {
await browser.close()
}
}
return [filename, html]
}
app.get('/get_page', async function(req, res) {
var url = req.body.url || req.query.url
console.log('Got URL: ' + url)
var [id, body] = await runJob(url)
console.log('Processed ID: ' + id + ' for URL: ' + url)
await res.send(body)
});
// handle errors
app.use((err, req, res, next) => {
if (! err) return next()
res.status(500)
res.send('500: Internal server error')
})
process.setMaxListeners(0);
process.on('uncaughtException', function(error) {
console.log('uncaughtException Error: ' + error)
})
process.on('unhandledRejection', function(error) {
console.log('unhandledRejection Error: ' + error)
})
app.listen(port)
console.log('Server is listening on port ' + port)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment