Last active
April 7, 2021 00:34
-
-
Save taxilian/4a327593b3e0225e8b97a49eebfb1fd0 to your computer and use it in GitHub Desktop.
My attempt at implementing a node.js server with kubernetes best practices for closing gracefully
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const dbConnections: Array<MongoClient> = []; | |
export type startupMiddleware = ReturnType<typeof initMiddleware>; | |
let failCase: Error; | |
export function setUnrecoverableError(err: Error) { | |
failCase = err; | |
console.warn(`Unrecoverable error set:`, err, err.stack); | |
} | |
let shutdownRequested = false; | |
/** This is how often k8s checks to see if we're alive */ | |
const ReadinessProbeInterval = 60; // seconds | |
/** | |
* This is how long we allow connections and requests to linger before | |
* we kill them if they don't close on their own | |
*/ | |
const ClosingTimeout = 120; // seconds | |
const httpTrackers: ServerTracker[] = []; | |
async function getConnectionCount() { | |
let count = 0; | |
for (const t of httpTrackers) { | |
count += t.connectionCount; | |
} | |
return count; | |
} | |
// Watch for the connection count to drop to zero | |
function shutdownConnectionWatch() { | |
if (!getConnectionCount()) { | |
finishShutdown(); | |
} | |
} | |
/** | |
* Stops accepting new connections, but does not | |
* (yet) terminate existing connections | |
*/ | |
function shutdownPhase2() { | |
global.isShutdown = true; | |
// We're now ready to shut everything off; stop accepting new connections | |
for (const t of httpTrackers) { | |
// Stop accepting new connections and close any idle connections | |
t.requestShutdown(); | |
} | |
// Close all websockets (and tell them to reconnect) | |
closeWebsockets(); | |
// Check every second to see if we're able to shut down -- once all connections | |
// are closed we can safely do so | |
setInterval(shutdownConnectionWatch, 1000).unref(); | |
setTimeout(() => { | |
// If we ever get to this point we're just going to force close everything | |
for (const t of httpTrackers) { | |
t.forceClose(); | |
} | |
finishShutdown(); | |
}, ClosingTimeout * 1000).unref(); | |
} | |
function closeWebsockets() { | |
for (let s of [examtoolsApp, hamstudyApp]) { | |
if (!s?.getWss) { continue; } | |
const wss = s.getWss(); | |
for (let ws of wss.clients) { | |
// Tell the websocket client to reconnect; close socket after 1s if needed | |
ws.send(JSON.stringify({msg: 'recon'})); | |
setTimeout(() => ws.close(), 1000).unref(); | |
} | |
} | |
} | |
/** | |
* Flags the process as "needs to shut down" but to start with just | |
* starts returning "not ready" from the readiness endpoint | |
*/ | |
function shutdownPhase1() { | |
shutdownRequested = true; | |
// Then we're going to wait for ReadinessProbeInterval * 1.5 to give | |
// plenty of time for it to stop sending us new requests before the next phase | |
setTimeout(shutdownPhase2, ReadinessProbeInterval * 1.5 * 1000); | |
} | |
const probeRouter = express.Router(); | |
probeRouter.get('/api/probe/ready', (req, res) => { | |
if (failCase || shutdownRequested) { | |
// As soon as shutdown is requested, start returning invalid for the readiness; | |
// we should also be "unready" any time there is a fail case, which also will | |
// fail liveness | |
return res.status(500).send("Service is closing"); | |
} | |
const allDbsConnected = dbConnections.every(c => c.isConnected()); | |
const allHttpsStarted = httpTrackers.length && httpTrackers.every(t => t.isListening); | |
if (!allDbsConnected) { | |
return res.status(500).send("Databases not (yet?) connected"); | |
} else if (!allHttpsStarted) { | |
return res.status(500).send("HTTP server(s) not ready"); | |
} | |
return res.send("ready"); | |
}); | |
probeRouter.get('/api/probe/live', (req, res) => { | |
if (failCase) { | |
// There was an unrecoverable error | |
return res.status(500).send(`Unrecoverable error: ${failCase.message}`); | |
} | |
}); | |
const connectionLostTimeout = 120; // seconds | |
let connectionLostTimer: ReturnType<typeof setTimeout>; | |
/******** | |
* Make sure you add a MongoClient for each mongodb database connection to | |
`dbConnections` so that it is considered as part of the readiness check. | |
* Make sure you do `httpTrackers.push(new ServerTracker(server))` for every | |
http.Server you are listening on. For the vast majority of all apps | |
there is only one, but my app has two | |
* Make sure you add `app.use(probeRouter)` pretty much as early as you reasonably | |
can after creating your express app; that way your liveness and readiness probes | |
have as little impact on performance as possible because they'll avoid middleware | |
* I recommend adding a check in your uncaught exception callback to catch | |
any unrecoverable errors, such as EADDRINUSE. See below... | |
********/ | |
process.on('uncaughtException', function (err) { | |
if ((<any>err)?.code === 'EADDRINUSE') { | |
setUnrecoverableError(err); | |
} | |
console.warn("Uncaught exception: ", err, err.stack); | |
}); | |
async function finishShutdown() { | |
console.log("Shutting down..."); | |
try { | |
// Close any database connections or similar that you have open | |
await mongoose.connection?.close(); | |
console.log("Application stopped, as long as all running tasks are stopped"); | |
// At this point it should be shut down within 5 seconds; if it isn't then | |
// we just kill it | |
setTimeout(forceShutdown, 5000); | |
} catch (err) { | |
console.warn("Error shutting down: ", err); | |
process.exit(1); | |
} | |
} | |
process.on('SIGINT', shutdownPhase1); | |
function forceShutdown(code: any) { | |
console.log("Failed to shut down gracefully, shutting down hard now"); | |
wtfDump(); | |
// If we hit this, dump out what was still keeping the process alive | |
process.exit(1); | |
process.abort(); // probably overkill =] | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Socket } from 'net'; | |
import http from 'http'; | |
import https from 'https'; | |
type WebServer = http.Server | https.Server; | |
type ConnID = number; | |
declare module 'net' { | |
// Augment the Server interface to add some helper metadata | |
interface Socket { | |
$$id: ConnID; | |
$$idle: boolean; | |
} | |
} | |
let nextId = 0; | |
function getNextId(): ConnID { | |
return nextId++; | |
} | |
class ServerTracker { | |
isShuttingDown = false; | |
connections = new Map<ConnID, Socket>(); | |
constructor(public server: WebServer) { | |
const c = this.connections; | |
server.on('connection', this.onConnection.bind(this)); | |
server.on('request', this.onRequest.bind(this)); | |
} | |
/** | |
* The number of currently open connections (including idle connections) | |
*/ | |
get connectionCount() { | |
return this.connections.size; | |
} | |
/** | |
* The number of connections which are not idle | |
*/ | |
get activeConnectionCount() { | |
return Array.from(this.connections).filter(([id, sock]) => !sock.$$idle).length; | |
} | |
get isListening() { return this.server.listening; } | |
/** | |
* Stops accepting requests, closes any idle (keepalive) | |
* connections, and starts closing connections as soon | |
* as requests finish | |
*/ | |
requestShutdown() { | |
// Stop accepting requests | |
if (this.server.listening) { | |
this.server.close(); | |
} | |
// Start closing sockets as soon as they are idle | |
this.isShuttingDown = true; | |
// Close any idle (keepalive) connections | |
for (const [id, sock] of this.connections.entries()) { | |
if (sock.$$idle) { | |
sock.destroy(); | |
} | |
} | |
} | |
/** | |
* Force-closes all remaining connections; this is destructive! Do not | |
* do this until you're confident that everything is | |
* totally ready to be closed | |
*/ | |
forceClose() { | |
// Just in case you went straight here... | |
// Stop accepting requests | |
if (this.server.listening) { | |
this.server.close(); | |
} | |
this.isShuttingDown = true; | |
for (const [id, sock] of this.connections.entries()) { | |
sock.destroy(); | |
} | |
} | |
onConnection(sock: Socket) { | |
const c = this.connections; | |
// track keepalive connections... | |
if (!sock.$$id) { | |
sock.$$id = getNextId(); | |
sock.$$idle = true; | |
c.set(sock.$$id, sock); | |
sock.on('close', () => { | |
c.delete(sock.$$id); | |
}); | |
} | |
return c.get(sock.$$id); | |
} | |
onRequest(req: http.IncomingMessage, res: http.ServerResponse) { | |
const sock = this.onConnection(req.socket); | |
sock.$$idle = false; | |
res.on('finish', () => { | |
sock.$$idle = true; | |
if (this.isShuttingDown) { | |
// If we're shutting down then close the connection, | |
// don't allow keepalive | |
sock.destroy(); | |
} | |
}); | |
} | |
} | |
export default ServerTracker; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment