Skip to content

Instantly share code, notes, and snippets.

@glen-84
Created April 20, 2021 09:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glen-84/4ad04cab7ed3ec74a2e6ed04e6c12348 to your computer and use it in GitHub Desktop.
Save glen-84/4ad04cab7ed3ec74a2e6ed04e6c12348 to your computer and use it in GitHub Desktop.
Koa middleware for Rendertron.
import axios from "axios";
import type {IncomingMessage} from "node:http";
import type {Middleware} from "koa";
/**
* A default set of user agent patterns for bots/crawlers that do not perform
* well with pages that require JavaScript.
*/
export const botUserAgents = [
// spell-checker:disable
"Baiduspider",
"bingbot",
"Embedly",
"facebookexternalhit",
"LinkedInBot",
"outbrain",
"pinterest",
"quora link preview",
"rogerbot",
"showyoubot",
"Slackbot",
"TelegramBot",
"Twitterbot",
"vkShare",
"W3C_Validator",
"WhatsApp"
// spell-checker:enable
];
/**
* A default set of file extensions for static assets that do not need to be
* proxied.
*/
const staticFileExtensions = [
"ai",
"avi",
"css",
"dat",
"dmg",
"doc",
"doc",
"exe",
"flv",
"gif",
"ico",
"iso",
"jpeg",
"jpg",
"js",
"less",
"m4a",
"m4v",
"mov",
"mp3",
"mp4",
"mpeg",
"mpg",
"pdf",
"png",
"ppt",
"psd",
"rar",
"rss",
"svg",
"swf",
"tif",
"torrent",
"ttf",
"txt",
"wav",
"wmv",
"woff",
"xls",
"xml",
"zip"
];
/**
* Options for makeMiddleware.
*/
export interface Options {
/**
* Base URL of the Rendertron proxy service. Required.
*/
proxyUrl: string;
/**
* Regular expression to match user agent to proxy. Defaults to a set of bots
* that do not perform well with pages that require JavaScript.
*/
userAgentPattern?: RegExp;
/**
* Regular expression used to exclude request URL paths. Defaults to a set of
* typical static asset file extensions.
*/
excludeUrlPattern?: RegExp;
/**
* Force web components polyfills to be loaded and enabled. Defaults to false.
*/
injectShadyDom?: boolean;
/**
* Millisecond timeout for proxy requests. Defaults to 11000 milliseconds.
*/
timeout?: number;
/**
* If a forwarded host header is found and matches one of the hosts in this
* array, then that host will be used for the request to the rendertron server
* instead of the actual host of the request.
* This is useful if this middleware is running on a different host
* which is proxied behind the actual site, and the rendertron server should
* request the main site.
*/
allowedForwardedHosts?: string[];
/**
* Header used to determine the forwarded host that should be used when
* building the URL to be rendered. Only applicable if `allowedForwardedHosts`
* is not empty.
* Defaults to `"X-Forwarded-Host"`.
*/
forwardedHostHeader?: string;
}
/**
* Create a new Koa middleware function that proxies requests to a
* Rendertron bot rendering service.
*/
export function makeMiddleware(options: Options): Middleware {
if (!options.proxyUrl) {
throw new Error("Must set options.proxyUrl.");
}
let {proxyUrl} = options;
if (!proxyUrl.endsWith("/")) {
proxyUrl += "/";
}
const userAgentPattern = options.userAgentPattern ?? new RegExp(botUserAgents.join("|"), "iu");
const excludeUrlPattern =
options.excludeUrlPattern ?? new RegExp(`\\.(${staticFileExtensions.join("|")})$`, "iu");
const injectShadyDom = Boolean(options.injectShadyDom);
// The Rendertron service itself has a hard limit of 10 seconds to render, so
// let's give a little more time than that by default.
const timeout = options.timeout ?? 11000; // Milliseconds.
const allowedForwardedHosts = options.allowedForwardedHosts ?? [];
const forwardedHostHeader = allowedForwardedHosts.length
? options.forwardedHostHeader ?? "X-Forwarded-Host"
: null;
const rendertronMiddleware: Middleware = async (ctx, next) => {
const ua = ctx.headers["user-agent"];
if (ua === undefined || !userAgentPattern.test(ua) || excludeUrlPattern.test(ctx.path)) {
await next();
return;
}
const forwardedHost = forwardedHostHeader !== null && ctx.get(forwardedHostHeader);
const host =
forwardedHost !== false && allowedForwardedHosts.includes(forwardedHost)
? forwardedHost
: ctx.get("host");
const incomingUrl = `${ctx.protocol}://${host + ctx.originalUrl}`;
let renderUrl = proxyUrl + encodeURIComponent(incomingUrl);
if (injectShadyDom) {
renderUrl += "?wc-inject-shadydom=true";
}
try {
const response = await axios.get<IncomingMessage>(renderUrl, {
responseType: "stream",
timeout
});
ctx.body = response.data;
} catch (e: unknown) {
if (axios.isAxiosError(e)) {
// eslint-disable-next-line no-console -- Okay in this context.
console.error(
`[rendertron middleware] ${e.code ?? "unknown"} error fetching ${renderUrl}`
);
}
await next();
}
};
return rendertronMiddleware;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment