Skip to content

Instantly share code, notes, and snippets.

@mgd020
Created October 30, 2019 04:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgd020/cb19c670f7e90b102510026adb3def5e to your computer and use it in GitHub Desktop.
Save mgd020/cb19c670f7e90b102510026adb3def5e to your computer and use it in GitHub Desktop.
parse JIRA api and put it into ES for analysis
"""
requirements:
jira<3
elasticsearch<7
elasticsearch-dsl<7
raven<7
python-dateutil<3
jsonpointer<3
"""
import argparse
import os
from pprint import pprint
import elasticsearch_dsl as es
import jsonpointer
from dateutil.parser import parse as parse_date
from jira import JIRA
from raven import Client
SERVER = 'https://webitau.atlassian.net'
USERNAME = os.getenv('JIRA_USERNAME')
PASSWORD = os.getenv('JIRA_PASSWORD')
ISSUE_INDEX = 'jira-issues'
WORKLOG_INDEX = 'jira-worklogs'
SENTRY_DSN = os.getenv('SENTRY_DSN')
ISSUE_STRIP_FIELDS = os.getenv('ISSUE_STRIP_FIELDS')
WORKLOG_STRIP_FIELDS = os.getenv('WORKLOG_STRIP_FIELDS')
class Issue(es.DocType):
dummy = es.Text()
class Meta:
doc_type = 'issue'
class Worklog(es.DocType):
issue = es.Object()
class Meta:
doc_type = 'worklog'
def strip_fields(obj, fields):
for field in (fields or '').split(','):
if field:
jsonpointer.set_pointer(obj, field, None)
def iter_pages(f, values_key, *args, **kwargs):
kwargs['json_result'] = True
while True:
response = f(*args, **kwargs)
results = response[values_key]
if not results:
break
for result in results:
yield result
kwargs['startAt'] = response['startAt'] + len(results)
def iter_issues(client, last_modified=None, full_worklog=True):
jql = 'created < endOfDay()'
if last_modified:
jql = 'updated > "%s"' % last_modified.strftime('%Y/%m/%d %H:%M')
jql += ' order by updated asc' # so if we get interrupted we can continue
for issue in iter_pages(client.search_issues, 'issues', jql, fields='*all,-comment,-description,-attachment'):
if full_worklog and issue['fields']['worklog']['total'] > issue['fields']['worklog']['maxResults']:
issue['fields']['worklog'] = client._get_json('issue/%s/worklog' % issue['id'])
yield issue
def iter_worklogs(client, last_modified=None, full_worklog=True, es_host=None):
for issue in iter_issues(client, last_modified, full_worklog):
worklog = issue['fields'].pop('worklog', {})
strip_fields(issue, ISSUE_STRIP_FIELDS)
if es_host:
Issue(meta={'id': issue['id']}, url=issue.pop('self'), **issue).save(index=ISSUE_INDEX)
# delete all existing worklogs for this issue
Worklog.search(index=WORKLOG_INDEX).query('match', **{'issue.id': issue['id']}).delete()
else:
pprint(issue)
for wl in worklog.get('worklogs', ()):
wl['issue'] = issue
yield wl
def sync(es_host=None, last_modified=None):
client = JIRA(SERVER, basic_auth=(USERNAME, PASSWORD))
if es_host:
es.connections.connections.create_connection(hosts=[es_host])
Worklog.init(index=WORKLOG_INDEX)
Issue.init(index=ISSUE_INDEX)
for worklog in iter_worklogs(client, last_modified, True, es_host):
strip_fields(worklog, WORKLOG_STRIP_FIELDS)
if es_host:
Worklog(meta={'id': worklog['id']}, url=worklog.pop('self'), **worklog).save(index=WORKLOG_INDEX)
else:
pprint(worklog)
if __name__ == '__main__':
client = Client(SENTRY_DSN)
parser = argparse.ArgumentParser(description='')
parser.add_argument('-e', '--es', dest='es_host', help='Elasticsearch host')
parser.add_argument('-m', '--last_modified', type=parse_date)
try:
sync(**vars(parser.parse_args()))
except Exception:
client.captureException()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment