mgd020/jira2es.py

## jira2es.py
"""
requirements:
jira<3
elasticsearch<7
elasticsearch-dsl<7
raven<7
python-dateutil<3
jsonpointer<3
"""

import argparse
import os
from pprint import pprint

import elasticsearch_dsl as es
import jsonpointer
from dateutil.parser import parse as parse_date
from jira import JIRA
from raven import Client


SERVER = 'https://webitau.atlassian.net'
USERNAME = os.getenv('JIRA_USERNAME')
PASSWORD = os.getenv('JIRA_PASSWORD')
ISSUE_INDEX = 'jira-issues'
WORKLOG_INDEX = 'jira-worklogs'
SENTRY_DSN = os.getenv('SENTRY_DSN')
ISSUE_STRIP_FIELDS = os.getenv('ISSUE_STRIP_FIELDS')
WORKLOG_STRIP_FIELDS = os.getenv('WORKLOG_STRIP_FIELDS')


class Issue(es.DocType):
    dummy = es.Text()

    class Meta:
        doc_type = 'issue'


class Worklog(es.DocType):
    issue = es.Object()

    class Meta:
        doc_type = 'worklog'


def strip_fields(obj, fields):
    for field in (fields or '').split(','):
        if field:
            jsonpointer.set_pointer(obj, field, None)


def iter_pages(f, values_key, *args, **kwargs):
    kwargs['json_result'] = True
    while True:
        response = f(*args, **kwargs)
        results = response[values_key]
        if not results:
            break
        for result in results:
            yield result
        kwargs['startAt'] = response['startAt'] + len(results)


def iter_issues(client, last_modified=None, full_worklog=True):
    jql = 'created < endOfDay()'
    if last_modified:
        jql = 'updated > "%s"' % last_modified.strftime('%Y/%m/%d %H:%M')
    jql += ' order by updated asc'  # so if we get interrupted we can continue
    for issue in iter_pages(client.search_issues, 'issues', jql, fields='*all,-comment,-description,-attachment'):
        if full_worklog and issue['fields']['worklog']['total'] > issue['fields']['worklog']['maxResults']:
            issue['fields']['worklog'] = client._get_json('issue/%s/worklog' % issue['id'])
        yield issue


def iter_worklogs(client, last_modified=None, full_worklog=True, es_host=None):
    for issue in iter_issues(client, last_modified, full_worklog):
        worklog = issue['fields'].pop('worklog', {})
        strip_fields(issue, ISSUE_STRIP_FIELDS)
        if es_host:
            Issue(meta={'id': issue['id']}, url=issue.pop('self'), **issue).save(index=ISSUE_INDEX)
            # delete all existing worklogs for this issue
            Worklog.search(index=WORKLOG_INDEX).query('match', **{'issue.id': issue['id']}).delete()
        else:
            pprint(issue)
        for wl in worklog.get('worklogs', ()):
            wl['issue'] = issue
            yield wl


def sync(es_host=None, last_modified=None):
    client = JIRA(SERVER, basic_auth=(USERNAME, PASSWORD))
    if es_host:
        es.connections.connections.create_connection(hosts=[es_host])
        Worklog.init(index=WORKLOG_INDEX)
        Issue.init(index=ISSUE_INDEX)
    for worklog in iter_worklogs(client, last_modified, True, es_host):
        strip_fields(worklog, WORKLOG_STRIP_FIELDS)
        if es_host:
            Worklog(meta={'id': worklog['id']}, url=worklog.pop('self'), **worklog).save(index=WORKLOG_INDEX)
        else:
            pprint(worklog)


if __name__ == '__main__':
    client = Client(SENTRY_DSN)
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-e', '--es', dest='es_host', help='Elasticsearch host')
    parser.add_argument('-m', '--last_modified', type=parse_date)
    try:
        sync(**vars(parser.parse_args()))
    except Exception:
        client.captureException()
	"""
	requirements:
	jira<3
	elasticsearch<7
	elasticsearch-dsl<7
	raven<7
	python-dateutil<3
	jsonpointer<3
	"""

	import argparse
	import os
	from pprint import pprint

	import elasticsearch_dsl as es
	import jsonpointer
	from dateutil.parser import parse as parse_date
	from jira import JIRA
	from raven import Client


	SERVER = 'https://webitau.atlassian.net'
	USERNAME = os.getenv('JIRA_USERNAME')
	PASSWORD = os.getenv('JIRA_PASSWORD')
	ISSUE_INDEX = 'jira-issues'
	WORKLOG_INDEX = 'jira-worklogs'
	SENTRY_DSN = os.getenv('SENTRY_DSN')
	ISSUE_STRIP_FIELDS = os.getenv('ISSUE_STRIP_FIELDS')
	WORKLOG_STRIP_FIELDS = os.getenv('WORKLOG_STRIP_FIELDS')


	class Issue(es.DocType):
	dummy = es.Text()

	class Meta:
	doc_type = 'issue'


	class Worklog(es.DocType):
	issue = es.Object()

	class Meta:
	doc_type = 'worklog'


	def strip_fields(obj, fields):
	for field in (fields or '').split(','):
	if field:
	jsonpointer.set_pointer(obj, field, None)


	def iter_pages(f, values_key, args, *kwargs):
	kwargs['json_result'] = True
	while True:
	response = f(args, *kwargs)
	results = response[values_key]
	if not results:
	break
	for result in results:
	yield result
	kwargs['startAt'] = response['startAt'] + len(results)


	def iter_issues(client, last_modified=None, full_worklog=True):
	jql = 'created < endOfDay()'
	if last_modified:
	jql = 'updated > "%s"' % last_modified.strftime('%Y/%m/%d %H:%M')
	jql += ' order by updated asc' # so if we get interrupted we can continue
	for issue in iter_pages(client.search_issues, 'issues', jql, fields='*all,-comment,-description,-attachment'):
	if full_worklog and issue['fields']['worklog']['total'] > issue['fields']['worklog']['maxResults']:
	issue['fields']['worklog'] = client._get_json('issue/%s/worklog' % issue['id'])
	yield issue


	def iter_worklogs(client, last_modified=None, full_worklog=True, es_host=None):
	for issue in iter_issues(client, last_modified, full_worklog):
	worklog = issue['fields'].pop('worklog', {})
	strip_fields(issue, ISSUE_STRIP_FIELDS)
	if es_host:
	Issue(meta={'id': issue['id']}, url=issue.pop('self'), **issue).save(index=ISSUE_INDEX)
	# delete all existing worklogs for this issue
	Worklog.search(index=WORKLOG_INDEX).query('match', **{'issue.id': issue['id']}).delete()
	else:
	pprint(issue)
	for wl in worklog.get('worklogs', ()):
	wl['issue'] = issue
	yield wl


	def sync(es_host=None, last_modified=None):
	client = JIRA(SERVER, basic_auth=(USERNAME, PASSWORD))
	if es_host:
	es.connections.connections.create_connection(hosts=[es_host])
	Worklog.init(index=WORKLOG_INDEX)
	Issue.init(index=ISSUE_INDEX)
	for worklog in iter_worklogs(client, last_modified, True, es_host):
	strip_fields(worklog, WORKLOG_STRIP_FIELDS)
	if es_host:
	Worklog(meta={'id': worklog['id']}, url=worklog.pop('self'), **worklog).save(index=WORKLOG_INDEX)
	else:
	pprint(worklog)


	if __name__ == '__main__':
	client = Client(SENTRY_DSN)
	parser = argparse.ArgumentParser(description='')
	parser.add_argument('-e', '--es', dest='es_host', help='Elasticsearch host')
	parser.add_argument('-m', '--last_modified', type=parse_date)
	try:
	sync(**vars(parser.parse_args()))
	except Exception:
	client.captureException()