Skip to content

Instantly share code, notes, and snippets.

@vankeer
Last active December 28, 2023 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vankeer/dbbceb051ac56adf1e9e9e350f066c79 to your computer and use it in GitHub Desktop.
Save vankeer/dbbceb051ac56adf1e9e9e350f066c79 to your computer and use it in GitHub Desktop.
GPT-based categorization of JIRA issues
import os
import pandas as pd
from openai import OpenAI
client = OpenAI(
api_key=os.environ['OPENAI_API_KEY'],
)
# Configure the columns to be considered
columns_to_consider = ["Summary", "Description", "Epic Link Summary", "Parent summary"]
# List of system components for categorization
system_components = ["Web platform", "Mobile app", "API", "Open API", "Libs", "Post-processor", "Unknown"]
def categorize_issue(issue_description):
"""
Categorizes a JIRA issue using OpenAI's GPT-3.5.
"""
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": issue_description + "\n\nCategorize the above JIRA issue into one of the following components. Answer with ONLY ONE item from this list, nothing else:\n" + ", ".join(system_components)},
]
)
category = response.choices[0].message.content.strip()
return category if category in system_components else "Unknown"
except Exception as e:
print(f"Error during categorization: {e}")
return "Unknown"
def process_csv(input_file, output_file):
"""
Processes the JIRA CSV file and categorizes each issue.
"""
# Read the CSV file
df = pd.read_csv(input_file, skiprows=0)
# Display column headers for debugging
print("Columns found in CSV:", df.columns.tolist())
# Trim spaces from column headers and make them case-insensitive
df.columns = [col.strip().title() for col in df.columns]
# Check if the necessary columns exist and adjust the column names as needed
actual_columns = {}
for col in columns_to_consider:
matched_cols = [c for c in df.columns if c.strip().lower() == col.lower()]
if not matched_cols:
raise ValueError(f"Column '{col}' not found in the CSV file")
actual_columns[col] = matched_cols[0]
# Add a new column for categorization
df['Categorization'] = 'Unknown'
# Process each row
for index, row in df.iterrows():
print(f"Processing row {index} out of {len(df)}...")
issue_description = "\n".join([f"{col}: {row[actual_columns[col]]}" for col in columns_to_consider if actual_columns[col] in df.columns and pd.notna(row[actual_columns[col]])])
df.at[index, 'Categorization'] = categorize_issue(issue_description)
# Save the updated DataFrame to a new CSV file
df.to_csv(output_file, index=False)
# Example usage
input_csv = 'Jira.csv'
output_csv = 'Jira_output.csv'
process_csv(input_csv, output_csv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment