Skip to content

Instantly share code, notes, and snippets.

@shaond
Last active June 27, 2018 11:28
Show Gist options
  • Select an option

  • Save shaond/f1d5d6250a0411675990 to your computer and use it in GitHub Desktop.

Select an option

Save shaond/f1d5d6250a0411675990 to your computer and use it in GitHub Desktop.
Node.js code to proxy an upstream webpage using cheerio
var http = require('http');
var express = require('express');
var router = express.Router();
/* GET users listing. */
router.get('/', function(req, res) {
var request = require('request');
var url = req.query.url;
// If our URL doesn't contain a protocol or
// ending slash, add it here
if (url.indexOf('://') === -1) {
url = 'http://' + url;
if (!url.match(/\/$/)) {
url += '/';
}
}
var page = request.get(url, function(error, response, body) {
if (!error) {
var proxied = proxy(body, url);
res.header('Cache-Control', 'no-cache, private, no-store, must-revalidate, max-stale=0, post-check=0, pre-check=0');
res.send(proxied);
}
});
});
function proxy(html, url) {
var cheerio = require('cheerio');
var $ = cheerio.load(html);
var content = '';
var base = '<base href=\'' + url + '\' />\n';
$('head').append(base);
$('img[src^="public"]').each(function() {
$(this).attr('href', url + '/' + $(this).attr('href'));
});
$('a:not([href^="http://"])' +
':not([href^="https://"])' +
':not([href^="//"])' +
':not([href^="javascript:"])')
.each(function() {
$(this).attr('href', url + $(this).attr('href'));
});
$('img:not([src^="http://"])' +
':not([src^="https://"])' +
':not([src^="//"])')
.each(function() {
$(this).attr('src', url + $(this).attr('src'));
});
$('link:not([href^="http://"])' +
':not([href^="https://"])' +
':not([href^="//"])')
.each(function() {
$(this).attr('href', url + $(this).attr('href'));
});
$('script[src]:not([src^="http://"])' +
':not([src^="https://"])' +
':not([src^="//"])')
.each(function() {
$(this).attr('src', url + $(this).attr('src'));
});
if (process.env.NODE_ENV === 'production') {
var mavenjs = '<script id="prod"></script>\n';
$('a').each(function() {
$(this).attr('href', 'http://example.com/proxy?url=' + $(this).attr('href'));
});
}
if (process.env.NODE_ENV === 'development') {
var mavenjs = '<script id="dev"></script>\n';
$('a').each(function() {
$(this).attr('href', 'http://localhost:3000/proxy?url=' + $(this).attr('href'));
});
}
$('body').append(mavenjs);
return $.html();
};
module.exports = router;
@shaond
Copy link
Copy Markdown
Author

shaond commented Jul 23, 2014

This code doesn't include the routes or surrounding Express code. Use with caution as it may not suit your requirements.

@maxdignan
Copy link
Copy Markdown

This looks pretty great!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment