Skip to content

Instantly share code, notes, and snippets.

@jdx
Last active March 16, 2023 18:08
Show Gist options
  • Save jdx/0f535be1ada0ea964cae to your computer and use it in GitHub Desktop.
Save jdx/0f535be1ada0ea964cae to your computer and use it in GitHub Desktop.
zero-downtime node.js app runner
// This script will boot app.js with the number of workers
// specified in WORKER_COUNT.
//
// The master will respond to SIGHUP, which will trigger
// restarting all the workers and reloading the app.
var cluster = require('cluster');
var workerCount = process.env.WORKER_COUNT || 2;
// Defines what each worker needs to run
// In this case, it's app.js a simple node http app
cluster.setupMaster({ exec: 'app.js' });
// Gets the count of active workers
function numWorkers() { return Object.keys(cluster.workers).length; }
var stopping = false;
// Forks off the workers unless the server is stopping
function forkNewWorkers() {
if (!stopping) {
for (var i = numWorkers(); i < workerCount; i++) { cluster.fork(); }
}
}
// A list of workers queued for a restart
var workersToStop = [];
// Stops a single worker
// Gives 60 seconds after disconnect before SIGTERM
function stopWorker(worker) {
console.log('stopping', worker.process.pid);
worker.disconnect();
var killTimer = setTimeout(function() {
worker.kill();
}, 60000);
// Ensure we don't stay up just for this setTimeout
killTimer.unref();
}
// Tell the next worker queued to restart to disconnect
// This will allow the process to finish it's work
// for 60 seconds before sending SIGTERM
function stopNextWorker() {
var i = workersToStop.pop();
var worker = cluster.workers[i];
if (worker) stopWorker(worker);
}
// Stops all the works at once
function stopAllWorkers() {
stopping = true;
console.log('stopping all workers');
for (var id in cluster.workers) {
stopWorker(cluster.workers[id]);
}
}
// Worker is now listening on a port
// Once it is ready, we can signal the next worker to restart
cluster.on('listening', stopNextWorker);
// A worker has disconnected either because the process was killed
// or we are processing the workersToStop array restarting each process
// In either case, we will fork any workers needed
cluster.on('disconnect', forkNewWorkers);
// HUP signal sent to the master process to start restarting all the workers sequentially
process.on('SIGHUP', function() {
console.log('restarting all workers');
workersToStop = Object.keys(cluster.workers);
stopNextWorker();
});
// Kill all the workers at once
process.on('SIGTERM', stopAllWorkers);
// Fork off the initial workers
forkNewWorkers();
console.log('app master', process.pid, 'booted');
@thelinuxlich
Copy link

Workers are not receiving message on disconnect, what can it be?

@leonardjaviniar
Copy link

Starting and stopping by about 3 - 5 times causes the error below when connecting to a database:

events.js:72  
     throw er; // Unhandled 'error' event
              ^
Error: write ENOTSUP - cannot write to IPC channel.  
    at errnoException (child_process.js:1001:11)  
    at ChildProcess.target.send (child_process.js:465:16)  
    at Worker.send (cluster.js:406:21)  
    at sendInternalMessage (cluster.js:399:10)  
    at handleResponse (cluster.js:177:5)  
    at respond (cluster.js:192:5)  
    at Object.messageHandler.queryServer (cluster.js:247:5)  
    at handleMessage (cluster.js:197:32)  
    at ChildProcess.emit (events.js:117:20)  
    at handleMessage (child_process.js:322:10)  

I am using MSSQL database connector for Node.js

@amit-handa
Copy link

facing issue in the script.
scenario: restarting workers
For 2 workers (say), it stops first worker. cluster.on('disconnect', ...) is called immediately (before worker.kill() is called through a timer). It is a bug.
Ideally, that function should be scheduled on cluster.on( 'exit', ...)
Hope I am correct. Doing this makes your script fly. would appreciate your inputs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment