Skip to content

Instantly share code, notes, and snippets.

@taxilian
Last active April 7, 2021 00:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taxilian/4a327593b3e0225e8b97a49eebfb1fd0 to your computer and use it in GitHub Desktop.
Save taxilian/4a327593b3e0225e8b97a49eebfb1fd0 to your computer and use it in GitHub Desktop.
My attempt at implementing a node.js server with kubernetes best practices for closing gracefully
const dbConnections: Array<MongoClient> = [];
export type startupMiddleware = ReturnType<typeof initMiddleware>;
let failCase: Error;
export function setUnrecoverableError(err: Error) {
failCase = err;
console.warn(`Unrecoverable error set:`, err, err.stack);
}
let shutdownRequested = false;
/** This is how often k8s checks to see if we're alive */
const ReadinessProbeInterval = 60; // seconds
/**
* This is how long we allow connections and requests to linger before
* we kill them if they don't close on their own
*/
const ClosingTimeout = 120; // seconds
const httpTrackers: ServerTracker[] = [];
async function getConnectionCount() {
let count = 0;
for (const t of httpTrackers) {
count += t.connectionCount;
}
return count;
}
// Watch for the connection count to drop to zero
function shutdownConnectionWatch() {
if (!getConnectionCount()) {
finishShutdown();
}
}
/**
* Stops accepting new connections, but does not
* (yet) terminate existing connections
*/
function shutdownPhase2() {
global.isShutdown = true;
// We're now ready to shut everything off; stop accepting new connections
for (const t of httpTrackers) {
// Stop accepting new connections and close any idle connections
t.requestShutdown();
}
// Close all websockets (and tell them to reconnect)
closeWebsockets();
// Check every second to see if we're able to shut down -- once all connections
// are closed we can safely do so
setInterval(shutdownConnectionWatch, 1000).unref();
setTimeout(() => {
// If we ever get to this point we're just going to force close everything
for (const t of httpTrackers) {
t.forceClose();
}
finishShutdown();
}, ClosingTimeout * 1000).unref();
}
function closeWebsockets() {
for (let s of [examtoolsApp, hamstudyApp]) {
if (!s?.getWss) { continue; }
const wss = s.getWss();
for (let ws of wss.clients) {
// Tell the websocket client to reconnect; close socket after 1s if needed
ws.send(JSON.stringify({msg: 'recon'}));
setTimeout(() => ws.close(), 1000).unref();
}
}
}
/**
* Flags the process as "needs to shut down" but to start with just
* starts returning "not ready" from the readiness endpoint
*/
function shutdownPhase1() {
shutdownRequested = true;
// Then we're going to wait for ReadinessProbeInterval * 1.5 to give
// plenty of time for it to stop sending us new requests before the next phase
setTimeout(shutdownPhase2, ReadinessProbeInterval * 1.5 * 1000);
}
const probeRouter = express.Router();
probeRouter.get('/api/probe/ready', (req, res) => {
if (failCase || shutdownRequested) {
// As soon as shutdown is requested, start returning invalid for the readiness;
// we should also be "unready" any time there is a fail case, which also will
// fail liveness
return res.status(500).send("Service is closing");
}
const allDbsConnected = dbConnections.every(c => c.isConnected());
const allHttpsStarted = httpTrackers.length && httpTrackers.every(t => t.isListening);
if (!allDbsConnected) {
return res.status(500).send("Databases not (yet?) connected");
} else if (!allHttpsStarted) {
return res.status(500).send("HTTP server(s) not ready");
}
return res.send("ready");
});
probeRouter.get('/api/probe/live', (req, res) => {
if (failCase) {
// There was an unrecoverable error
return res.status(500).send(`Unrecoverable error: ${failCase.message}`);
}
});
const connectionLostTimeout = 120; // seconds
let connectionLostTimer: ReturnType<typeof setTimeout>;
/********
* Make sure you add a MongoClient for each mongodb database connection to
`dbConnections` so that it is considered as part of the readiness check.
* Make sure you do `httpTrackers.push(new ServerTracker(server))` for every
http.Server you are listening on. For the vast majority of all apps
there is only one, but my app has two
* Make sure you add `app.use(probeRouter)` pretty much as early as you reasonably
can after creating your express app; that way your liveness and readiness probes
have as little impact on performance as possible because they'll avoid middleware
* I recommend adding a check in your uncaught exception callback to catch
any unrecoverable errors, such as EADDRINUSE. See below...
********/
process.on('uncaughtException', function (err) {
if ((<any>err)?.code === 'EADDRINUSE') {
setUnrecoverableError(err);
}
console.warn("Uncaught exception: ", err, err.stack);
});
async function finishShutdown() {
console.log("Shutting down...");
try {
// Close any database connections or similar that you have open
await mongoose.connection?.close();
console.log("Application stopped, as long as all running tasks are stopped");
// At this point it should be shut down within 5 seconds; if it isn't then
// we just kill it
setTimeout(forceShutdown, 5000);
} catch (err) {
console.warn("Error shutting down: ", err);
process.exit(1);
}
}
process.on('SIGINT', shutdownPhase1);
function forceShutdown(code: any) {
console.log("Failed to shut down gracefully, shutting down hard now");
wtfDump();
// If we hit this, dump out what was still keeping the process alive
process.exit(1);
process.abort(); // probably overkill =]
}
import { Socket } from 'net';
import http from 'http';
import https from 'https';
type WebServer = http.Server | https.Server;
type ConnID = number;
declare module 'net' {
// Augment the Server interface to add some helper metadata
interface Socket {
$$id: ConnID;
$$idle: boolean;
}
}
let nextId = 0;
function getNextId(): ConnID {
return nextId++;
}
class ServerTracker {
isShuttingDown = false;
connections = new Map<ConnID, Socket>();
constructor(public server: WebServer) {
const c = this.connections;
server.on('connection', this.onConnection.bind(this));
server.on('request', this.onRequest.bind(this));
}
/**
* The number of currently open connections (including idle connections)
*/
get connectionCount() {
return this.connections.size;
}
/**
* The number of connections which are not idle
*/
get activeConnectionCount() {
return Array.from(this.connections).filter(([id, sock]) => !sock.$$idle).length;
}
get isListening() { return this.server.listening; }
/**
* Stops accepting requests, closes any idle (keepalive)
* connections, and starts closing connections as soon
* as requests finish
*/
requestShutdown() {
// Stop accepting requests
if (this.server.listening) {
this.server.close();
}
// Start closing sockets as soon as they are idle
this.isShuttingDown = true;
// Close any idle (keepalive) connections
for (const [id, sock] of this.connections.entries()) {
if (sock.$$idle) {
sock.destroy();
}
}
}
/**
* Force-closes all remaining connections; this is destructive! Do not
* do this until you're confident that everything is
* totally ready to be closed
*/
forceClose() {
// Just in case you went straight here...
// Stop accepting requests
if (this.server.listening) {
this.server.close();
}
this.isShuttingDown = true;
for (const [id, sock] of this.connections.entries()) {
sock.destroy();
}
}
onConnection(sock: Socket) {
const c = this.connections;
// track keepalive connections...
if (!sock.$$id) {
sock.$$id = getNextId();
sock.$$idle = true;
c.set(sock.$$id, sock);
sock.on('close', () => {
c.delete(sock.$$id);
});
}
return c.get(sock.$$id);
}
onRequest(req: http.IncomingMessage, res: http.ServerResponse) {
const sock = this.onConnection(req.socket);
sock.$$idle = false;
res.on('finish', () => {
sock.$$idle = true;
if (this.isShuttingDown) {
// If we're shutting down then close the connection,
// don't allow keepalive
sock.destroy();
}
});
}
}
export default ServerTracker;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment