Skip to content

Instantly share code, notes, and snippets.

@vfreex
Last active October 18, 2021 13:29
Show Gist options
  • Save vfreex/ad1981e368d7077158aa6b8b8e074f6b to your computer and use it in GitHub Desktop.
Save vfreex/ad1981e368d7077158aa6b8b8e074f6b to your computer and use it in GitHub Desktop.
import asyncio
from os import wait
import re
from io import StringIO
import sys
from typing import Dict, List, Optional, TextIO
import aiohttp
import koji
from datetime import datetime, timedelta
def parse_record_log(file: TextIO) -> Dict[str, List[Dict[str, Optional[str]]]]:
"""
Parse record.log from Doozer into a dict.
The dict will be keyed by the type of operation performed.
The values will be a list of dicts. Each of these dicts will contain the attributes for a single recorded operation of the top
level key's type.
"""
result = {}
for line in file:
fields = line.rstrip().split("|")
type = fields[0]
record = {entry_split[0]: entry_split[1] if len(entry_split) > 1 else None for entry_split in map(lambda entry: entry.split("=", 1), fields[1:]) if entry_split[0]}
result.setdefault(type, []).append(record)
return result
async def get_record_log(build_id: int):
url = f"https://saml.buildvm.openshift.eng.bos.redhat.com:8888/job/aos-cd-builds/job/build%252Focp4/{build_id}/artifact/doozer_working/record.log"
async with aiohttp.ClientSession() as session:
resp = await session.get(url, ssl=False)
resp.raise_for_status()
text = await resp.text()
return text
async def get_cachito_metrics(koji_api: koji.ClientSession, task_id: int = 39706701):
url = f"http://download.eng.bos.redhat.com/brewroot/work/tasks/{task_id % 10000}/{task_id}/orchestrator.log"
# info = koji_api.getTaskInfo(task_id, request=True)
async with aiohttp.ClientSession() as session:
resp = await session.get(url, ssl=False)
resp.raise_for_status()
text = await resp.text()
# search orchestrator.log
pattern_datetime = re.compile(r"(\d{4}-\d{2}-\d+ \d+:\d+:\d+,\d+)")
pattern_request_id = re.compile(r"request (\d+)")
result = {
"task_id": task_id
}
for line in text.splitlines():
if "cachito - DEBUG - Making request" in line:
m = pattern_datetime.match(line)
result["request_start_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
elif "cachito - DEBUG - Cachito response" in line:
m = pattern_datetime.match(line)
result["request_end_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
elif "cachito - INFO - Waiting for request" in line:
m = pattern_datetime.match(line)
result["wait_start_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
result["request_id"] = int(pattern_request_id.search(line)[1])
elif "cachito - DEBUG - Request" in line or "cachito - ERROR - Request" in line:
m = pattern_datetime.match(line)
result["wait_end_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
elif "cachito - DEBUG - Downloading" in line:
m = pattern_datetime.match(line)
result["download_start_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
elif "cachito - DEBUG - Sources bundle" in line:
m = pattern_datetime.match(line)
result["download_end_time"] = datetime.strptime(m[0], '%Y-%m-%d %H:%M:%S,%f')
break
return result
async def main():
koji_api = koji.ClientSession("https://brewhub.engineering.redhat.com/brewhub")
record_log_content = await get_record_log(int(sys.argv[1]))
record_log = parse_record_log(StringIO(record_log_content))
builds = {}
tid_builds = {}
for build in record_log["build"]:
tid_builds[int(build["task_id"])] = build
builds.setdefault(build["distgit"], []).append(build)
metrics_tids = []
metrics_futs = []
metrics_map = {}
for dg_key, builds in builds.items():
for b in builds:
print(f"Analyzing {dg_key} build {b.get('nvrs')}", file=sys.stderr)
tid = int(b["task_id"])
# success = build["status"] == "0"
metrics_tids.append(tid)
metrics_futs.append(asyncio.ensure_future(get_cachito_metrics(koji_api, tid)))
print("tid, status, dg_key, nvr, request_id, request_start_time, request_end_time, wait_start_time, wait_end_time, download_start_time, download_end_time, request_time, wait_time, download_time, total_time")
for tid, metrics in zip(metrics_tids, await asyncio.gather(*metrics_futs)):
metrics_map[tid] = metrics
build = tid_builds[tid]
request_time = (metrics.get('request_end_time') - metrics.get('request_start_time')).total_seconds() if metrics.get('request_end_time') else ''
wait_time = (metrics.get('wait_end_time') - metrics.get('wait_start_time')).total_seconds() if metrics.get('wait_end_time') else ''
download_time = (metrics.get('download_end_time') - metrics.get('download_start_time')).total_seconds() if metrics.get('download_end_time') else ''
total_time = (metrics.get('download_end_time') - metrics.get('request_start_time')).total_seconds() if metrics.get('download_end_time') else ''
print(f"{tid}, {build['status']}, {build['distgit']}, {build.get('nvrs')}, {metrics.get('request_id')}, {metrics.get('request_start_time')}, {metrics.get('request_end_time')}, {metrics.get('wait_start_time')}, {metrics.get('wait_end_time')}, {metrics.get('download_start_time')}, {metrics.get('download_end_time')}, {request_time}, {wait_time}, {download_time}, {total_time}")
# top_total_times = sorted(metrics_map.values(), key=lambda m: m["download_end_time"] - m["request_start_time"] if "download_end_time" in m else timedelta(), reverse=True)
pass
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment