Skip to content

Instantly share code, notes, and snippets.

@MostAwesomeDude
Created February 1, 2017 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MostAwesomeDude/2fed99e5cf630639df42df7997e12d1e to your computer and use it in GitHub Desktop.
Save MostAwesomeDude/2fed99e5cf630639df42df7997e12d1e to your computer and use it in GitHub Desktop.
#!/usr/bin/env nix-shell
#! nix-shell -i python -p pythonPackages.treq pythonPackages.twisted pythonPackages.prometheus_client
import os
import time
from twisted.internet import reactor
from twisted.internet.task import LoopingCall, deferLater, react
from twisted.web.client import Agent
from twisted.web.server import Site
from twisted.web.resource import Resource
from treq import post
from prometheus_client import Gauge, Histogram
from prometheus_client.twisted import MetricsResource
CONNECT_TIMEOUT = 7
URI = os.environ["VERIFIER_URI"]
LAST_CHECK = Gauge("verifier_last_check", "Moment of last check (timestamp)",
["deep", "repair"])
CHECK_LATENCY = Histogram("verifier_check_latency", "Check latency (seconds)",
["deep", "repair"])
AVAILABLE = Gauge("verifier_available",
"Whether the grid is available (bool)")
SHARES = Gauge("verifier_shares", "Number of Shares", ["type"])
agent = Agent(reactor, connectTimeout=CONNECT_TIMEOUT)
def checkTahoe(repair):
# To try to reduce coordinated omission, we start tracking time
# immediately and record it as late as possible.
before = time.time()
# XXX urlparse
params = "?t=check&output=json"
if repair:
params += "&repair=true"
d = post("http://localhost:3456/uri/" + URI + params, agent=agent)
@d.addCallback
def jsonify(response):
return response.json()
@d.addCallback
def measure(json):
if repair:
results = json["post-repair-results"]
else:
results = json["results"]
corruptShares = results["count-corrupt-shares"]
wrongShares = results["count-wrong-shares"]
healthy = results["healthy"]
# recoverable = results["recoverable"]
rv = None
if not repair and not healthy:
rv = deferLater(reactor, checkTahoe, repair=True)
SHARES.labels(type="corrupt").set(corruptShares)
SHARES.labels(type="wrong").set(wrongShares)
LAST_CHECK.labels(deep=False, repair=repair).set_to_current_time()
AVAILABLE.set(1)
# As late as possible.
after = time.time()
CHECK_LATENCY.labels(deep=False, repair=repair).observe(after - before)
return rv
@d.addErrback
def fail(failure):
AVAILABLE.set(0)
return d
def main(reactor):
root = Resource()
root.putChild(b'metrics', MetricsResource())
factory = Site(root)
port = reactor.listenTCP(8000, factory)
print "Listening on port", port.getHost()
lc = LoopingCall(checkTahoe, repair=False)
return lc.start(60, now=True)
react(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment