Skip to content

Instantly share code, notes, and snippets.

@wjkennedy
Last active January 27, 2023 16:16
Show Gist options
  • Save wjkennedy/0751dfe45e35a2b054c11099d95e8516 to your computer and use it in GitHub Desktop.
Save wjkennedy/0751dfe45e35a2b054c11099d95e8516 to your computer and use it in GitHub Desktop.
from alluvial import alluvial_diagram
# Create a list of edges, where each edge represents a connection between a word and a project
edges = []
for i, row in df.iterrows():
project = row['Project Name']
issue_key = row['Issue Key']
summary = row['Summary']
top_words = row['Top Words']
for word in top_words:
edges.append((word, project))
# Create the alluvial flow diagram
nodes = list(set([i[0] for i in edges] + [i[1] for i in edges]))
fig = alluvial_diagram(edges, nodes, width=600, height=400)
# Save the diagram as a JSON file
with open('alluvial_diagram.json', 'w') as f:
json.dump(fig, f)
import plotly.graph_objs as go
# Create a list of the top 10 words
top_words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']
# Create a list of traces for each project
traces = []
for project in df['Project'].unique():
project_df = df[df['Project'] == project]
project_df = project_df[['Project'] + top_words]
traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[0]], mode='markers', name=top_words[0]))
for i in range(1, len(top_words)):
traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[i]], mode='markers', name=top_words[i], showlegend=False))
# Create the layout for the plot
layout = go.Layout(title='Projects and Top Words', xaxis=dict(title='Project Name'), yaxis=dict(title='Frequency'))
# Create the figure and show the plot
fig = go.Figure(data=traces, layout=layout)
fig.show()
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd
# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'
# Jira API endpoint
jira_url = "https://jira.example.com"
# Jira credentials
jira_username = "your_username"
jira_password = "your_password"
# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))
# Execute the Jira query
issues = jira.search_issues(jira_query)
# Initialize list to store frequency distribution data
fdist_data = []
# Iterate over the returned issues
for issue in issues:
# Extract the project key, summary, and description
project_key = issue.key
summary = issue.fields.summary
description = issue.fields.description
# Concatenate the summary and description
text = summary + " " + description
# Tokenize the text
tokens = word_tokenize(text)
# Remove stop words
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Compute frequency distribution
fdist = FreqDist(tokens)
# Append the project key and frequency distribution to the data list
fdist_data.append({'Project Key': project_key, 'Frequency Distribution': dict(fdist)})
# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)
# Sort the DataFrame by Project Key
df = df.sort_values(by=['Project Key'])
# Print the DataFrame
print(df)
df.to_csv('jira_project.csv',index=False)
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd
# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'
# Jira API endpoint
jira_url = "https://jira.example.com"
# Jira credentials
jira_username = "your_username"
jira_password = "your_password"
# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))
# Initialize list to store frequency distribution data
fdist_data = []
# Initialize starting point
startAt = 0
# Continuously retrieve the next set of issues
while True:
# Execute the Jira query
issues = jira.search_issues(jira_query, startAt=startAt)
# Exit the loop if no more issues are returned
if len(issues) == 0:
break
# Iterate over the returned issues
for issue in issues:
# Extract the project key, summary, and description
project_key = issue.key
summary = issue.fields.summary
description = issue.fields.description
# Concatenate the summary and description
text = summary + " " + description
# Tokenize the text
tokens = word_tokenize(text)
# Remove stop words
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Compute frequency distribution
fdist = FreqDist(tokens)
# Get the top 10 most common words
top_words = fdist.most_common(10)
# Append the project key and top words to the data list
fdist_data.append({'Project Key': project_key, 'Top Words': top_words})
# Update the starting point
startAt += len(issues)
# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)
# Sort the DataFrame by Project Key
df = df.sort_values(by=['Project Key'])
# Print the DataFrame
print(df)
import string
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd
# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'
# Jira API endpoint
jira_url = "https://jira.example.com"
# Jira credentials
jira_username = "your_username"
jira_password = "your_password"
# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))
# Initialize list to store frequency distribution data
fdist_data = []
# Initialize starting point
startAt = 0
# Continuously retrieve the next set of issues
while True:
# Execute the Jira query
issues = jira.search_issues(jira_query, startAt=startAt)
# Exit the loop if no more issues are returned
if len(issues) == 0:
break
# Iterate over the returned issues
for issue in issues:
# Extract the project key, project name, summary, and description
issue_key = issue.key
project_name = issue.fields.project.name
summary = issue.fields.summary
description = issue.fields.description
# Concatenate the summary and description
text = summary + " " + description
# Remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
# Tokenize the text
tokens = word_tokenize(text)
# Remove stop words
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Compute frequency distribution
fdist = FreqDist(tokens)
# Get the top 10 most common words
top_words = fdist.most_common(10)
# Append the project key, project name, summary, and top words to the data list
fdist_data.append({'Issue Key': issue_key, 'Project Name': project_name, 'Summary': summary, 'Top Words': top_words})
# Update the starting point
startAt += len(issues)
# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)
# Sort the DataFrame by Issue Key
df = df.sort_values(by=['Issue Key'])
# Print the DataFrame
print(df)
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt
import pandas as pd
# Jira project text
jira_project = "This is a sample Jira project text that we will use for demonstration purposes."
# Jira project name
project_name = "Sample Project"
# Tokenize the text
tokens = word_tokenize(jira_project)
# Remove stop words
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Compute frequency distribution
fdist = FreqDist(tokens)
# Plot the frequency distribution using a dot diagram
fdist.plot(30, cumulative=False, title="Jira Project Word Frequency Distribution")
plt.show()
# Create a dictionary of the frequency distribution results
fdist_dict = dict(fdist)
# Create a DataFrame with the project name and frequency distribution results
df = pd.DataFrame({'Project Name': [project_name], 'Frequency Distribution': [fdist_dict]})
# Print the DataFrame
print(df)
#save dataframe to csv
df.to_csv('jira_project.csv',index=False)
df[['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']] = df['Top Words'].apply(lambda x: pd.Series(list(zip(*x))[0]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment