Skip to content

Instantly share code, notes, and snippets.

@stralex7
Last active January 30, 2018 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stralex7/69264fead1fb387ff3873206a3a6ec3d to your computer and use it in GitHub Desktop.
Save stralex7/69264fead1fb387ff3873206a3a6ec3d to your computer and use it in GitHub Desktop.
monitor and restart ethminer due to cuda errors/no responses
#!/usr/bin/python3
import os
import signal
import subprocess
import sys
import time
try:
TIMEOUT_NO_ACTIVITY_SECONDS = int(os.getenv('TIMEOUT_NO_ACTIVITY_SECONDS', 60))
except:
TIMEOUT_NO_ACTIVITY_SECONDS = 60
class MinerException(Exception):
pass
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException("No activity from ethminer for {} seconds".format(TIMEOUT_NO_ACTIVITY_SECONDS))
def execute(cmd):
signal.signal(signal.SIGALRM, timeout_handler)
shutdown = False
last_job_time=time.time()
while not shutdown:
proc = subprocess.Popen(cmd,
bufsize=0,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True)
try:
signal.alarm(TIMEOUT_NO_ACTIVITY_SECONDS)
for line in iter(proc.stdout.readline, ""):
line = line.strip()
job_time_delta=time.time()-last_job_time
print(line)
#print("Last job was %f ms ago" % job_time_delta)
if job_time_delta>120:
last_job_time=time.time()
raise MinerException('****** Restarting due to not getting new jobs')
if (line.find('Received new job'))>=0:
print("Time since last job: %f ms" % job_time_delta)
last_job_time=time.time()
if line.startswith('Could not resolve host'):
raise MinerException('****** Restarting due to DNS error')
if line.startswith('CUDA error'):
raise MinerException('****** Restarting due to CUDA error')
signal.alarm(TIMEOUT_NO_ACTIVITY_SECONDS)
except (MinerException, TimeoutException) as e:
print('\n\n', str(e), '\n\n')
except KeyboardInterrupt:
shutdown = True
signal.alarm(0)
#proc.send_signal(signal.SIGINT)
proc.kill()
proc.stdout.close()
try:
proc.wait(timeout=15)
except subprocess.TimeoutExpired:
print("Miner didn't shutdown within 15 seconds")
proc.kill()
if __name__ == "__main__":
execute(sys.argv[1:])
@stralex7
Copy link
Author

I have a problem with etherminer where it keeps running without an error, but having no connection to ethermine.org. I'm trying to restart a process (actually kill it) when I see no new jobs from the pool for more than 120 seconds. I'm running this on a single 1070 card rig, so in theory, you might want to reduce this even further to 60 seconds.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment