Last active
January 27, 2023 16:16
-
-
Save wjkennedy/0751dfe45e35a2b054c11099d95e8516 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from alluvial import alluvial_diagram | |
# Create a list of edges, where each edge represents a connection between a word and a project | |
edges = [] | |
for i, row in df.iterrows(): | |
project = row['Project Name'] | |
issue_key = row['Issue Key'] | |
summary = row['Summary'] | |
top_words = row['Top Words'] | |
for word in top_words: | |
edges.append((word, project)) | |
# Create the alluvial flow diagram | |
nodes = list(set([i[0] for i in edges] + [i[1] for i in edges])) | |
fig = alluvial_diagram(edges, nodes, width=600, height=400) | |
# Save the diagram as a JSON file | |
with open('alluvial_diagram.json', 'w') as f: | |
json.dump(fig, f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.graph_objs as go | |
# Create a list of the top 10 words | |
top_words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10'] | |
# Create a list of traces for each project | |
traces = [] | |
for project in df['Project'].unique(): | |
project_df = df[df['Project'] == project] | |
project_df = project_df[['Project'] + top_words] | |
traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[0]], mode='markers', name=top_words[0])) | |
for i in range(1, len(top_words)): | |
traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[i]], mode='markers', name=top_words[i], showlegend=False)) | |
# Create the layout for the plot | |
layout = go.Layout(title='Projects and Top Words', xaxis=dict(title='Project Name'), yaxis=dict(title='Frequency')) | |
# Create the figure and show the plot | |
fig = go.Figure(data=traces, layout=layout) | |
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from jira import JIRA | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
import pandas as pd | |
# Jira query | |
jira_query = 'project = "Sample Project" and status = "Closed"' | |
# Jira API endpoint | |
jira_url = "https://jira.example.com" | |
# Jira credentials | |
jira_username = "your_username" | |
jira_password = "your_password" | |
# Connect to Jira | |
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password)) | |
# Execute the Jira query | |
issues = jira.search_issues(jira_query) | |
# Initialize list to store frequency distribution data | |
fdist_data = [] | |
# Iterate over the returned issues | |
for issue in issues: | |
# Extract the project key, summary, and description | |
project_key = issue.key | |
summary = issue.fields.summary | |
description = issue.fields.description | |
# Concatenate the summary and description | |
text = summary + " " + description | |
# Tokenize the text | |
tokens = word_tokenize(text) | |
# Remove stop words | |
stop_words = set(stopwords.words('english')) | |
tokens = [token for token in tokens if token.lower() not in stop_words] | |
# Compute frequency distribution | |
fdist = FreqDist(tokens) | |
# Append the project key and frequency distribution to the data list | |
fdist_data.append({'Project Key': project_key, 'Frequency Distribution': dict(fdist)}) | |
# Create a DataFrame from the frequency distribution data | |
df = pd.DataFrame(fdist_data) | |
# Sort the DataFrame by Project Key | |
df = df.sort_values(by=['Project Key']) | |
# Print the DataFrame | |
print(df) | |
df.to_csv('jira_project.csv',index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from jira import JIRA | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
import pandas as pd | |
# Jira query | |
jira_query = 'project = "Sample Project" and status = "Closed"' | |
# Jira API endpoint | |
jira_url = "https://jira.example.com" | |
# Jira credentials | |
jira_username = "your_username" | |
jira_password = "your_password" | |
# Connect to Jira | |
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password)) | |
# Initialize list to store frequency distribution data | |
fdist_data = [] | |
# Initialize starting point | |
startAt = 0 | |
# Continuously retrieve the next set of issues | |
while True: | |
# Execute the Jira query | |
issues = jira.search_issues(jira_query, startAt=startAt) | |
# Exit the loop if no more issues are returned | |
if len(issues) == 0: | |
break | |
# Iterate over the returned issues | |
for issue in issues: | |
# Extract the project key, summary, and description | |
project_key = issue.key | |
summary = issue.fields.summary | |
description = issue.fields.description | |
# Concatenate the summary and description | |
text = summary + " " + description | |
# Tokenize the text | |
tokens = word_tokenize(text) | |
# Remove stop words | |
stop_words = set(stopwords.words('english')) | |
tokens = [token for token in tokens if token.lower() not in stop_words] | |
# Compute frequency distribution | |
fdist = FreqDist(tokens) | |
# Get the top 10 most common words | |
top_words = fdist.most_common(10) | |
# Append the project key and top words to the data list | |
fdist_data.append({'Project Key': project_key, 'Top Words': top_words}) | |
# Update the starting point | |
startAt += len(issues) | |
# Create a DataFrame from the frequency distribution data | |
df = pd.DataFrame(fdist_data) | |
# Sort the DataFrame by Project Key | |
df = df.sort_values(by=['Project Key']) | |
# Print the DataFrame | |
print(df) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
from jira import JIRA | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
import pandas as pd | |
# Jira query | |
jira_query = 'project = "Sample Project" and status = "Closed"' | |
# Jira API endpoint | |
jira_url = "https://jira.example.com" | |
# Jira credentials | |
jira_username = "your_username" | |
jira_password = "your_password" | |
# Connect to Jira | |
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password)) | |
# Initialize list to store frequency distribution data | |
fdist_data = [] | |
# Initialize starting point | |
startAt = 0 | |
# Continuously retrieve the next set of issues | |
while True: | |
# Execute the Jira query | |
issues = jira.search_issues(jira_query, startAt=startAt) | |
# Exit the loop if no more issues are returned | |
if len(issues) == 0: | |
break | |
# Iterate over the returned issues | |
for issue in issues: | |
# Extract the project key, project name, summary, and description | |
issue_key = issue.key | |
project_name = issue.fields.project.name | |
summary = issue.fields.summary | |
description = issue.fields.description | |
# Concatenate the summary and description | |
text = summary + " " + description | |
# Remove punctuation | |
text = text.translate(str.maketrans("", "", string.punctuation)) | |
# Tokenize the text | |
tokens = word_tokenize(text) | |
# Remove stop words | |
stop_words = set(stopwords.words('english')) | |
tokens = [token for token in tokens if token.lower() not in stop_words] | |
# Compute frequency distribution | |
fdist = FreqDist(tokens) | |
# Get the top 10 most common words | |
top_words = fdist.most_common(10) | |
# Append the project key, project name, summary, and top words to the data list | |
fdist_data.append({'Issue Key': issue_key, 'Project Name': project_name, 'Summary': summary, 'Top Words': top_words}) | |
# Update the starting point | |
startAt += len(issues) | |
# Create a DataFrame from the frequency distribution data | |
df = pd.DataFrame(fdist_data) | |
# Sort the DataFrame by Issue Key | |
df = df.sort_values(by=['Issue Key']) | |
# Print the DataFrame | |
print(df) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
# Jira project text | |
jira_project = "This is a sample Jira project text that we will use for demonstration purposes." | |
# Jira project name | |
project_name = "Sample Project" | |
# Tokenize the text | |
tokens = word_tokenize(jira_project) | |
# Remove stop words | |
stop_words = set(stopwords.words('english')) | |
tokens = [token for token in tokens if token.lower() not in stop_words] | |
# Compute frequency distribution | |
fdist = FreqDist(tokens) | |
# Plot the frequency distribution using a dot diagram | |
fdist.plot(30, cumulative=False, title="Jira Project Word Frequency Distribution") | |
plt.show() | |
# Create a dictionary of the frequency distribution results | |
fdist_dict = dict(fdist) | |
# Create a DataFrame with the project name and frequency distribution results | |
df = pd.DataFrame({'Project Name': [project_name], 'Frequency Distribution': [fdist_dict]}) | |
# Print the DataFrame | |
print(df) | |
#save dataframe to csv | |
df.to_csv('jira_project.csv',index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df[['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']] = df['Top Words'].apply(lambda x: pd.Series(list(zip(*x))[0])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment