Skip to content

Instantly share code, notes, and snippets.

@fnordahl
Last active July 8, 2019 08:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fnordahl/2718e3369351faea919d4646018f9bde to your computer and use it in GitHub Desktop.
Save fnordahl/2718e3369351faea919d4646018f9bde to your computer and use it in GitHub Desktop.
parse-rgw.py
#!/usr/bin/env python3
import collections
import sys
def main():
"""Parse RadosGW civetweb log and group per connection.
The RadosGW civetweb log file is a funny chap. Lines for start and done
requests reference a pointer to a RGWRequest object while lines with the
URL of the request reference a pointer to a mg_connection struct.
They do have a intermediate common identifier which is the pointer to the
thread object serving the request, BUT, that identifier is only common
between the start of a request and beginning of the next request served
by that thread.
After that all the pointer addresses will be re-used in different
constellations.
"""
conn_in_flight = {}
Logline = collections.namedtuple(
'Logline', ['date', 'time', 'connptr', 'code', 'txt'])
for line in sys.stdin.readlines():
data = Logline(*line.split(None, 4))
if 'starting new request' in data.txt:
if data.connptr in conn_in_flight:
print(conn_in_flight[data.connptr])
# Remove it from dict so we can reliably print the last
# connection of the file after this loop
del conn_in_flight[data.connptr]
conn_in_flight[data.connptr] = [data]
else:
if data.connptr not in conn_in_flight:
# There may be unreferenced log lines at the beginning of file
# because of log rotation, skip them.
continue
conn_in_flight[data.connptr].append(data)
# Flush out the last connection
for connptr in conn_in_flight:
print(conn_in_flight[connptr])
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment