Skip to content

Instantly share code, notes, and snippets.

@vojtechsokol
Last active September 29, 2020 15:34
Show Gist options
  • Save vojtechsokol/e427c65d541ac294d3170df7a133ac9c to your computer and use it in GitHub Desktop.
Save vojtechsokol/e427c65d541ac294d3170df7a133ac9c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
'''
pip install jenkinsapi
python >= 3.7
'''
import argparse
import logging
import pickle
from datetime import datetime
from jenkinsapi.constants import STATUS_SUCCESS
from jenkinsapi.jenkins import Jenkins
import pytz
import requests
JOB_NAME = 'ci-dispatch-github_pull_request_opened'
DATAGREPPER_URL = 'https://apps.fedoraproject.org/datagrepper'
DATAGREPPER_PAGINATION = 100
DATAGREPPER_TOPIC = 'org.fedoraproject.prod.github.pull_request.opened'
MAX_BUILDS = 10
class PullInfo(object):
def __init__(self, owner, repo, pull_nr, sha, build_nr=None, build_time=None, msg_time=None, msg_id=None):
self.owner = owner
self.repo = repo
self.pull_nr = pull_nr
self.sha = sha
self.build_nr = build_nr
self.build_time = build_time
self.msg_time = msg_time
self.msg_id = msg_id
def __repr__(self):
return ('{0}(owner={1.owner!r}, repo={1.repo!r}, pull_nr={1.pull_nr!r}, sha={1.sha!r},'
' build_nr={1.build_nr!r}, build_time={1.build_time!r},'
' msg_time={1.msg_time!r}, msg_id={1.msg_id!r})'.format(type(self).__name__, self))
def __eq__(self, other):
return (self.owner == other.owner
and self.repo == other.repo
and self.pull_nr == other.pull_nr
and self.sha == other.sha)
def get_pull_info(self):
return f'{self.owner}:{self.repo}:{self.pull_nr}:{self.sha}'
def get_pull_link(self):
return f'https://github.com/{self.owner}/{self.repo}/pull/{self.pull_nr}/commits/{self.sha}'
def get_datagrepper_link(self, datagrepper_url):
if self.msg_id:
return f'{datagrepper_url}/id?id={self.msg_id}&size=extra-large'
return None
def get_jenkins_url(self, jenkins_url, job_name):
if self.build_nr:
return f'{jenkins_url}/job/{job_name}/{self.build_nr}'
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--jenkins', required=True, help='Jenkins URL')
parser.add_argument('--job', help='Job name (default {})'.format(JOB_NAME),
default=JOB_NAME)
parser.add_argument('--topic', help='FedMsg topic (default {})'.format(DATAGREPPER_TOPIC),
default=DATAGREPPER_TOPIC)
parser.add_argument('--builds', help='Number of build to check (default {})'.format(MAX_BUILDS),
default=MAX_BUILDS, type=int)
parser.add_argument('--dump', action='store_true', help='Pickle retrieved messages and job builds')
args = parser.parse_args()
jenkins_url = args.jenkins
job_name = args.job
topic = args.topic
builds_number = args.builds
dump = args.dump
log_format = '%(levelname)-8s: %(message)s'
logging.basicConfig(format=log_format)
logging.getLogger().setLevel(logging.INFO)
# logging.getLogger('urllib3').setLevel(logging.INFO)
# ---------------------------------------------------------------------------- #
# collect info about builds of jenkins job
# ---------------------------------------------------------------------------- #
j = Jenkins(jenkins_url)
job = j.get_job(job_name)
builds = []
logging.info('Fetching %d Jenkins job builds, it may take a while.', builds_number)
for i, build_id in enumerate(job.get_build_ids(), start=1):
if i > builds_number:
break
if i % 10 == 0:
logging.info('%d Jenkins job builds fetched.', i)
build = job.get_build(build_id)
if build.get_status() != STATUS_SUCCESS:
pass
#
build_id = build.name.split()[-1]
_, owner, repo, pull_nr, sha, _ = build_id.split(':')
pull_info = PullInfo(owner, repo, pull_nr, sha, build_nr=build.buildno, build_time=build.get_timestamp())
builds.append(pull_info)
# ---------------------------------------------------------------------------- #
# collect info about messages from fedmsg
# ---------------------------------------------------------------------------- #
from_timestamp = int(builds[-1].build_time.timestamp() - 1)
to_timestamp = int(builds[0].build_time.timestamp() + 1)
assert to_timestamp > from_timestamp
logging.info('Fetching messages from datagrepper, it may take a while.')
query_params = {
'topic': topic,
'start': from_timestamp,
'end': to_timestamp,
'rows_per_page': DATAGREPPER_PAGINATION
}
response = requests.get(DATAGREPPER_URL + '/raw', params=query_params).json()
logging.info('%d messages fetched.', response['count'])
def append_messages(messages, response):
for message in response['raw_messages']:
m = message['msg']
owner, repo = m['repository']['full_name'].split('/')
pull_nr = str(m['pull_request']['number'])
sha = m['pull_request']['head']['sha']
msg_time = datetime.fromtimestamp(message['timestamp'], pytz.utc)
pull_info = PullInfo(owner, repo, pull_nr, sha, msg_id=message['msg_id'], msg_time=msg_time)
messages.append(pull_info)
messages = []
append_messages(messages, response)
for page in range(2, response['pages'] + 1):
query_params['page'] = page
response = requests.get(DATAGREPPER_URL + '/raw', params=query_params).json()
logging.info('%d messages fetched.', response['count'])
append_messages(messages, response)
logging.info('%d messages fetched in total.', response['total'])
# ---------------------------------------------------------------------------- #
# dump collected info
# ---------------------------------------------------------------------------- #
if dump:
with open('builds.pickle', 'wb') as f:
pickle.dump(builds, f)
with open('messages.pickle', 'wb') as f:
pickle.dump(messages, f)
# ---------------------------------------------------------------------------- #
# merge builds and messages - assume messages on fedmsg are always present and only Jenkins jobs can be missing
# ---------------------------------------------------------------------------- #
merged = []
# exploit the fact that messages/builds are always in the same order, only some builds can be missing
shift_start = 0
for idx, build in enumerate(builds):
for message in messages[shift_start:]:
shift_start += 1
if build == message:
tmp = build
tmp.msg_time = message.msg_time
tmp.msg_id = message.msg_id
merged.append(tmp)
break
else:
merged.append(message)
# ---------------------------------------------------------------------------- #
# print matching and missing messages
# ---------------------------------------------------------------------------- #
print('messages with triggered builds')
for message in merged:
if message.build_nr:
print('\t{}'.format(message.get_pull_link()))
print('\t{}'.format(message.msg_time))
print('\t{}'.format(message.get_datagrepper_link(DATAGREPPER_URL)))
print('\t{}'.format(message.get_jenkins_url(jenkins_url, job_name)))
print()
print('messages without triggered builds')
for message in merged:
if not message.build_nr:
print('\t{}'.format(message.get_pull_link()))
print('\t{}'.format(message.msg_time))
print('\t{}'.format(message.get_datagrepper_link(DATAGREPPER_URL)))
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment