Skip to content

Instantly share code, notes, and snippets.

@dblanchette
Last active September 6, 2023 18:53
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save dblanchette/b8ed8cf42431f56024c1c70ed5137e0f to your computer and use it in GitHub Desktop.
Save dblanchette/b8ed8cf42431f56024c1c70ed5137e0f to your computer and use it in GitHub Desktop.
import os
from datetime import datetime, timedelta
from typing import Any, Dict, Generator, List, Union
import requests
# Optional - to connect using OAuth credentials
from oauthlib.oauth1 import SIGNATURE_RSA
class JiraClient:
def __init__(
self,
username: str = None,
api_token: str = None,
access_token: str = None,
access_token_secret: str = None,
consumer_key: str = None,
key_cert: str = None,
):
self._user_url = os.getenv("JIRA_SERVER", "").rstrip("/")
self._base_url = f"{self._user_url}/rest/api/3"
if username and api_token:
self._session = requests.Session()
self._session.auth = (username, api_token)
return
elif access_token and access_token_secret and consumer_key and key_cert:
self._session = OAuth1Session(
consumer_key,
rsa_key=key_cert,
resource_owner_key=access_token,
resource_owner_secret=access_token_secret,
signature_method=SIGNATURE_RSA,
)
else:
raise ValueError("Must use API token or OAuth credentials")
def _get_paginated_results(
self, url: str, results_key: str, parameters: Dict[str, Union[str, int]] = None, use_post: bool = False,
) -> Generator[Dict[str, Any], None, None]:
"""Get results of a paginated call that uses 'maxResults', 'startAt', and 'total' attributes.
:param url: URL without any pagination parameters
:param results_key: The key of the response dict that contains the actual elements to return (varies from calls to calls). Ex.: "items"
:param parameters: If use_post is False, URL parameters. If use_post is True, json encoded body parameters
:param use_post: Use POST instead of GET. Needed if parameters are too long to fit in an URL
"""
parameters = parameters or {}
results_per_page = 1000
parameters["maxResults"] = results_per_page
next = 0
while True:
parameters["startAt"] = next
if use_post:
response = self._session.post(url, json=parameters)
else:
response = self._session.get(url, params=parameters)
response.raise_for_status()
response_json = response.json()
results = response_json[results_key]
if response_json["maxResults"] < results_per_page:
# Some calls limit the maximum value of maxResults
results_per_page = response_json["maxResults"]
parameters["maxResults"] = results_per_page
for result in results:
yield result
next += results_per_page
if next >= response_json["total"]:
return
def _get_paginated_results_with_next_page_link(self, url: str) -> Generator[Dict[str, Any], None, None]:
"""Get results of a call that returns a payload with the lastPage and nextPage attributes"""
is_last_page = False
while not is_last_page:
response = self._session.get(url)
response.raise_for_status()
response_json = response.json()
for result in response_json["values"]:
yield result
is_last_page = response_json.get("lastPage", True)
if not is_last_page:
url = response_json["nextPage"]
def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]:
"""Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime
Faster than getting worklogs through issues
"""
worklog_ids: List[str] = []
for worklog_entry in self._get_paginated_results_with_next_page_link(
f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}"
):
worklog_ids.append(worklog_entry["worklogId"])
worklogs_per_page = 1000
ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
worklogs_by_id: Dict[str, Dict[str, Any]] = {} # Using a dict to remove duplicates returned by the Jira API
# This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items."
# Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post
for ids_to_get in ids_in_groups_per_page:
for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json():
# Optionnaly remove the worklogs you don't want (not in the right time period)
worklogs_by_id[worklog["id"]] = worklog
return list(worklogs_by_id.values())
def search_issues(self, jql: str, fields: List[str] = None) -> List[Dict[str, Any]]:
"""Return issues that matches a specified JQL query"""
issues: List[Dict[str, Any]] = []
parameters: Dict[str, Union[str, List[str]]] = {"jql": jql}
if fields:
parameters["fields"] = fields
for issue in self._get_paginated_results(f"{self._base_url}/search", parameters=parameters, results_key="issues", use_post=True):
issues.append(issue)
return issues
# Point 3 - get issues for the retrieved worklogs
def retrieve_issues_for_worklogs(self, worklogs: List[Dict[str, Any]], fields: List[str] = None) -> List[Dict[str, Any]]:
"""Get Issue objects referenced in a list of worklogs"""
return self.search_issues(f"id in ({','.join(str(issue_id) for issue_id in set(worklog['issueId'] for worklog in worklogs))})", fields=fields)
# Example usage
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/"
client = JiraClient("me@companyname.com", "my_api_token_12345")
recent_worklogs = client.retrieve_worklogs_updated_since(datetime.now() - timedelta(days=14))
@apollovy
Copy link

apollovy commented Jun 21, 2020

Line 3:

from typing import Any, Dict, Generator, List, Union

Line 49:

        results_per_page = 1000

Line 125:

        return self.search_issues(f"id in ({','.join(str(issue_id) for issue_id in set(worklog['issueId'] for worklog in worklogs))})", fields=fields)

@dblanchette
Copy link
Author

Thank you for the corrections @apollovy, edited the gist.

@kolomicenko
Copy link

kolomicenko commented Oct 23, 2020

Line 100:

ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page - 1] for i in range(0, len(worklog_ids), worklogs_per_page)]

@dblanchette
Copy link
Author

Line 100:

ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page - 1] for i in range(0, len(worklog_ids), worklogs_per_page)]

Are you sure about this?

Let's say we have 32 worklogs IDs (here identified by the numbers 0 to 31)

>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]

If we list 10 per_page, we get 3 lists of 10 and one list of 2:

>>> [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31]]

With your correction, we miss number 9, 19, and 29:

[[0, 1, 2, 3, 4, 5, 6, 7, 8], [10, 11, 12, 13, 14, 15, 16, 17, 18], [20, 21, 22, 23, 24, 25, 26, 27, 28], [30, 31]]```

@kolomicenko
Copy link

Of course you're right. That's not a good correction.

However, there still must be something wrong with the code, somewhere above the place that I pointed out. The list worklog_ids returned by the generator has some specific items duplicated. Like this:

>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31]

@dblanchette
Copy link
Author

I think what can happen is that new entries were added in between paginated calls so the list shifts. Let's say we request the 100 more recent entries, then a user creates a new worklog entry and we request entries 101 to 200, then entry 101 is the previous entry 100.

This can be solved by removing duplicates:
Line 109

return list(set(worklogs))

Makes sense?

@kolomicenko
Copy link

It is reproducible and it is actually the Jira API. It builds the nextPage URL using the timestamp of the last item on current page. As a consequence, the next page always starts with that last item from the previous page (plus other items with the same timestamp). I agree removing the duplicates as you suggested makes sense.

@dblanchette
Copy link
Author

Interesting, I modified the gist. Thanks and have a nice day!

@abhii-singh
Copy link

thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'

Any leads or I can debug it

@jjoshm
Copy link

jjoshm commented Jan 14, 2021

@singh-ab
you can try this version

def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]:
        """Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime
        Faster than getting worklogs through issues
        """
        worklog_ids: List[str] = []
        for worklog_entry in self._get_paginated_results_with_next_page_link(
            f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}"
        ):
            worklog_ids.append(worklog_entry["worklogId"])

        worklogs_per_page = 1000
        ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
        worklogs: List[Dict[str, Any]] = []
        # This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items."
        # Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post
        for ids_to_get in ids_in_groups_per_page:
            for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json():
                # Optionnaly remove the worklogs you don't want (not in the right time period)
                worklogs.append(worklog)
        
        # Remove duplicates returned by the Jira API
        worklogs_clean = []
        [worklogs_clean.append(obj) for obj in worklogs if obj not in worklogs_clean]

        return worklogs_clean

@dblanchette
Copy link
Author

thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'

Any leads or I can debug it

@singh-ab The issue should be fixed now

Thanks for the help @joshmuente !

@aquac
Copy link

aquac commented Dec 6, 2021

@dblanchette Thanks a lot for this gist!
Is there any reason why you are not using
https://pypi.org/project/jira/
?

@dblanchette
Copy link
Author

@aquac I don't know if this has been fixed since, but it did not support pagination at time of creating this gist.

@svgincan
Copy link

@dblanchette Running this script returns blank. I'm just updating the script with my URL, login email address and API token.

//
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/"
client = JiraClient("me@companyname.com", "my_api_token_12345")
//

Am I missing something ?

@dblanchette
Copy link
Author

@svgincan The script does not retrun anything, it is meant to be included in another project.

Depending on your needs, you may want to print recent_worklogs or process it more, for example.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment