Skip to content

Instantly share code, notes, and snippets.

@ento
Created May 1, 2019 06:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ento/b28aa1502ea6c2f872d44a58500a30ee to your computer and use it in GitHub Desktop.
Save ento/b28aa1502ea6c2f872d44a58500a30ee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys
import math
from pathlib import Path
import glob
from collections import namedtuple
from subprocess import run, check_output, PIPE
import csv
Branch = namedtuple('Branch', ['builds', 'name', 'total_size'])
Build = namedtuple('Build', ['number', 'paused', 'total_size', 'stash_size', 'archive_size'])
def measure_size(path: Path) -> int:
if path.exists():
du_result = check_output(['du', '-s', str(path)])
return int(du_result.split()[0].decode('utf-8'))
return -1
def is_build_paused(build_path: Path) -> bool:
flow_nodes_path = build_path.joinpath('workflow', 'flowNodeStore.xml')
if not flow_nodes_path.exists():
return False
command = 'grep Pause -B1 "{}" | grep endTime | grep ">0<"'.format(flow_nodes_path)
# capture stdout so that it doesn't contaminate the csv output
result = run(['bash', '-o', 'pipefail', '-c', command], stdout=PIPE).returncode
return result == 0
def iter_builds(branch_path: Path):
for build in glob.iglob(str(branch_path.joinpath('builds', '*'))):
build_path = Path(build)
if build_path.is_symlink():
continue
if not build_path.is_dir():
continue
total_size = measure_size(build_path)
stash_size = measure_size(build_path.joinpath('stashes'))
archive_size = measure_size(build_path.joinpath('archive'))
number = build_path.name
paused = is_build_paused(build_path)
yield Build(
number=number,
paused=paused,
total_size=total_size,
stash_size=stash_size,
archive_size=archive_size,
)
def iter_branches(job_path: Path):
for branch in glob.iglob(str(job_path.joinpath('branches', '*'))):
branch_path = Path(branch)
yield Branch(
name=branch_path.name,
builds=list(iter_builds(branch_path)),
total_size=measure_size(branch_path),
)
def build_stats(builds, size_attr_name):
sizes = sorted([getattr(build, size_attr_name) for build in builds])
if len(builds) == 0:
return (-1, -1, -1, -1)
else:
average = sum(sizes) / len(sizes)
median = sizes[math.floor(len(sizes) * 0.5)]
return (average, median, min(sizes), max(sizes))
def main(jenkins_home):
multibranch_jobs = glob.glob(str(jenkins_home.joinpath('jobs', '*', 'branches')))
writer = csv.writer(sys.stdout)
header = [
'job_name',
'branch_name',
'is_hourly',
'num_builds',
'num_paused_builds',
'branch_size',
'total_avg',
'total_median',
'total_min',
'total_max',
'stash_avg',
'stash_median',
'stash_min',
'stash_max',
'archive_avg',
'archive_median',
'archive_min',
'archive_max',
]
writer.writerow(header)
for job in multibranch_jobs:
job_path = Path(job).parent
job_name = job_path.name
for branch in iter_branches(job_path):
is_hourly = 'hourly' in branch.name
row = [
job_name,
branch.name,
is_hourly,
len(branch.builds),
len([build for build in branch.builds if build.paused]),
branch.total_size,
]
row += build_stats(branch.builds, 'total_size')
row += build_stats(branch.builds, 'stash_size')
row += build_stats(branch.builds, 'archive_size')
writer.writerow(row)
if __name__ == '__main__':
main(Path('/var/lib/jenkins'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment