Skip to content

Instantly share code, notes, and snippets.

@tsibley
Created October 15, 2023 07:43
Show Gist options
  • Save tsibley/15d335630151673721ce32921a3e3042 to your computer and use it in GitHub Desktop.
Save tsibley/15d335630151673721ce32921a3e3042 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
usage: restart-when-stalled <timeout> <cmd>
restart-when-stalled --help
Runs <cmd> via the shell ({SHELL}) and monitors its stdout.
If <timeout> seconds pass without output, the <cmd> process
is stopped and <cmd> is restarted.
The <cmd> process is stopped by sending SIGTERM, waiting up
to 5s for the process to exit, and then sending SIGKILL if
it still hasn't.
"""
import argparse
import asyncio
import re
from inspect import cleandoc
from os import environ
from sys import argv, stdout, stderr, exit
from textwrap import dedent
SHELL = environ.get("SHELL") or "/bin/bash"
DOC = cleandoc(__doc__.format(SHELL = SHELL))
def args():
usage, description = DOC.split("\n\n", 1)
p = argparse.ArgumentParser(
usage = dedent(re.sub(r"^usage:\s+", "", usage)),
description = description,
formatter_class = argparse.RawDescriptionHelpFormatter)
p.add_argument("timeout", metavar = "<timeout>", type = int, help = argparse.SUPPRESS)
p.add_argument("cmd", metavar = "<cmd>", help = argparse.SUPPRESS)
args = p.parse_args()
return args.timeout, args.cmd
async def monitor(timeout: int, cmd: str):
while True:
print(f"*** {cmd}")
proc = await asyncio.create_subprocess_shell(
cmd,
stdout = asyncio.subprocess.PIPE,
executable = SHELL)
stalled = False
while True:
try:
output = await asyncio.wait_for(proc.stdout.read(1024), timeout)
except asyncio.TimeoutError:
print("\n\n*** STALL DETECTED… terminating and restarting ***\n\n", file = stderr)
stalled = True
proc.terminate()
try:
await asyncio.wait_for(proc.wait(), 5)
except asyncio.TimeoutError:
proc.kill()
break
else:
if output:
stdout.buffer.write(output)
stdout.buffer.flush()
else:
# EOF
break
await proc.wait()
if not stalled:
break
# We're done, exit with the same code as the last process invocation.
code = proc.returncode
# returncode is -N if exiting due to signal N; convert to bash-like 128 + N.
if code < 0:
code = -code + 128
return code
if __name__ == "__main__":
exit(asyncio.run(monitor(*args())))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment