Created
March 2, 2012 21:30
-
-
Save andreasf/1961533 to your computer and use it in GitHub Desktop.
python daemon that automatically stores intel gpu error states
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import subprocess | |
import shlex | |
import sqlite3 | |
import datetime | |
import web | |
import hashlib | |
import threading | |
import Queue | |
SYSLOG_FILE = '/var/log/messages' | |
DB_FILE = '/var/log/syslog_events.sqlite3' | |
TAIL = '/usr/bin/tail' | |
ERR_STATE_FILES = ('/sys/kernel/debug/dri/0/i915_error_state', | |
'/sys/kernel/debug/dri/64/i915_error_state') | |
INSERT_EVENT = "i" | |
QUERY_EVENT = "j" | |
ITEMS_PER_PAGE = 10 | |
class Monitor(threading.Thread): | |
def __init__(self): | |
threading.Thread.__init__(self) | |
self.syslog = self._create_pipe() | |
self.daemon = False | |
def _create_pipe(self): | |
""" | |
create_pipe() -> subprocess.Popen | |
""" | |
cmd = shlex.split(TAIL + ' -f ' + SYSLOG_FILE) | |
return subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
def _gpu_hung_event(self, message): | |
""" | |
gpu_hung_event(str) -> (str, str) | |
""" | |
state = "syslog:\n" + message + "\n\n" | |
for path in ERR_STATE_FILES: | |
state += path + ":\n" | |
state_file = open(path, 'r') | |
state += state_file.read() | |
state += "\n\n" | |
return ("gpu_hung", state) | |
def _dispatch(self, message): | |
""" | |
dispatch(str) -> (str, str) | |
""" | |
if message.find("GPU hung") != -1: | |
return self._gpu_hung_event(message) | |
return (None, None) | |
def _handle(self, message): | |
""" | |
handle(str) -> None | |
""" | |
now = datetime.datetime.now() | |
(event, payload) = self._dispatch(message) | |
if event is not None: | |
SqliteThread.q.put(DbJob(INSERT_EVENT, event=event, | |
payload=payload, ts=now)) | |
def run(self): | |
line = self.syslog.stdout.readline() | |
while line is not None and line != "": | |
line = self.syslog.stdout.readline() | |
self._handle(line) | |
class SqliteThread(threading.Thread): | |
q = Queue.Queue() | |
def __init__(self): | |
threading.Thread.__init__(self) | |
self.daemon = True | |
self.db = None | |
def _init_db(self): | |
""" | |
_init_db(Monitor) -> sqlite3.Connection | |
""" | |
# TODO syslog rotation? | |
db = sqlite3.connect(DB_FILE) | |
db.cursor().executescript(""" | |
create table if not exists events( | |
id integer primary key autoincrement, | |
hash binary(8) unique, | |
ts datetime, | |
event char(32), | |
payload text | |
); | |
""") | |
return db | |
# create unique index if not exists hashindex on events (hash); | |
def _insert_event(self, dbjob): | |
if dbjob.event: | |
m = hashlib.md5() | |
m.update(dbjob.payload) | |
digest = buffer(m.digest()) | |
self.db.cursor().execute(""" | |
insert or ignore into events (ts, hash, event, payload) | |
values (?, ?, ?, ?) | |
""", (dbjob.ts, digest, dbjob.event, dbjob.payload)) | |
self.db.commit() | |
def _query_event(self, dbjob): | |
page = int(dbjob.page) * ITEMS_PER_PAGE | |
cur = self.db.cursor() | |
cur.execute(""" | |
select ts, event, payload from events | |
order by ts desc limit 10 offset ? | |
""", (page,)) | |
results = list() | |
rows = cur.fetchmany(ITEMS_PER_PAGE) | |
for row in rows: | |
results.append(Event(*row)) | |
dbjob.queue.put(results) | |
def run(self): | |
self.db = self._init_db() | |
switch = { | |
INSERT_EVENT: lambda dbjob: self._insert_event(dbjob), | |
QUERY_EVENT: lambda dbjob: self._query_event(dbjob), | |
} | |
while True: | |
item = SqliteThread.q.get() | |
try: | |
switch[item.type](item) | |
except KeyError: | |
pass | |
SqliteThread.q.task_done() | |
class Event: | |
def __init__(self, timestamp, event, payload): | |
self.event = event | |
self.timestamp = timestamp | |
self.payload = payload | |
class DbJob: | |
def __init__(self, type, **kwargs): | |
self.type = type | |
for key in kwargs: | |
self.__dict__[key ]= kwargs.get(key) | |
def start_web(): | |
urls = ( | |
'/(.*)', 'index_view' | |
) | |
app = web.application(urls, globals()) | |
app.run() | |
class index_view: | |
def GET(self, args): | |
self.__dict__['q'] = Queue.Queue() | |
query = DbJob(QUERY_EVENT, page=0, queue=self.q) | |
SqliteThread.q.put(query) | |
results = self.q.get() | |
self.q.task_done() | |
return self.render(results) | |
def render(self, results): | |
items = list("<li><h2>%s: %s</h2><pre>%s</pre></li>".encode | |
("utf-8") % ( | |
item.timestamp, item.event, item.payload) for item in results) | |
return """ | |
<!doctype html> | |
<html> | |
<head> | |
<title>syslog events</title> | |
<style> | |
h2 {font-size: 100%%; } | |
li { list-style: none; } | |
ul { padding: 0px; } | |
</style> | |
<meta charset="utf-8"> | |
</head> | |
<body> | |
<h1>Syslog events</h1> | |
<ul> | |
%s | |
</ul> | |
</body> | |
</html> | |
""" % ("".join(items)) | |
def main(): | |
sql = SqliteThread() | |
eventsd = Monitor() | |
sql.start() | |
eventsd.start() | |
start_web() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment