Last active
December 28, 2023 16:43
-
-
Save vankeer/dbbceb051ac56adf1e9e9e350f066c79 to your computer and use it in GitHub Desktop.
GPT-based categorization of JIRA issues
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
from openai import OpenAI | |
client = OpenAI( | |
api_key=os.environ['OPENAI_API_KEY'], | |
) | |
# Configure the columns to be considered | |
columns_to_consider = ["Summary", "Description", "Epic Link Summary", "Parent summary"] | |
# List of system components for categorization | |
system_components = ["Web platform", "Mobile app", "API", "Open API", "Libs", "Post-processor", "Unknown"] | |
def categorize_issue(issue_description): | |
""" | |
Categorizes a JIRA issue using OpenAI's GPT-3.5. | |
""" | |
try: | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": issue_description + "\n\nCategorize the above JIRA issue into one of the following components. Answer with ONLY ONE item from this list, nothing else:\n" + ", ".join(system_components)}, | |
] | |
) | |
category = response.choices[0].message.content.strip() | |
return category if category in system_components else "Unknown" | |
except Exception as e: | |
print(f"Error during categorization: {e}") | |
return "Unknown" | |
def process_csv(input_file, output_file): | |
""" | |
Processes the JIRA CSV file and categorizes each issue. | |
""" | |
# Read the CSV file | |
df = pd.read_csv(input_file, skiprows=0) | |
# Display column headers for debugging | |
print("Columns found in CSV:", df.columns.tolist()) | |
# Trim spaces from column headers and make them case-insensitive | |
df.columns = [col.strip().title() for col in df.columns] | |
# Check if the necessary columns exist and adjust the column names as needed | |
actual_columns = {} | |
for col in columns_to_consider: | |
matched_cols = [c for c in df.columns if c.strip().lower() == col.lower()] | |
if not matched_cols: | |
raise ValueError(f"Column '{col}' not found in the CSV file") | |
actual_columns[col] = matched_cols[0] | |
# Add a new column for categorization | |
df['Categorization'] = 'Unknown' | |
# Process each row | |
for index, row in df.iterrows(): | |
print(f"Processing row {index} out of {len(df)}...") | |
issue_description = "\n".join([f"{col}: {row[actual_columns[col]]}" for col in columns_to_consider if actual_columns[col] in df.columns and pd.notna(row[actual_columns[col]])]) | |
df.at[index, 'Categorization'] = categorize_issue(issue_description) | |
# Save the updated DataFrame to a new CSV file | |
df.to_csv(output_file, index=False) | |
# Example usage | |
input_csv = 'Jira.csv' | |
output_csv = 'Jira_output.csv' | |
process_csv(input_csv, output_csv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment