wjkennedy/flow.py

## flow.py
from alluvial import alluvial_diagram

# Create a list of edges, where each edge represents a connection between a word and a project
edges = []
for i, row in df.iterrows():
    project = row['Project Name']
    issue_key = row['Issue Key']
    summary = row['Summary']
    top_words = row['Top Words']
    for word in top_words:
        edges.append((word, project))

# Create the alluvial flow diagram
nodes = list(set([i[0] for i in edges] + [i[1] for i in edges]))
fig = alluvial_diagram(edges, nodes, width=600, height=400)

# Save the diagram as a JSON file
with open('alluvial_diagram.json', 'w') as f:
    json.dump(fig, f)

## flow2.py
import plotly.graph_objs as go

# Create a list of the top 10 words
top_words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']

# Create a list of traces for each project
traces = []
for project in df['Project'].unique():
    project_df = df[df['Project'] == project]
    project_df = project_df[['Project'] + top_words]
    traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[0]], mode='markers', name=top_words[0]))
    for i in range(1, len(top_words)):
        traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[i]], mode='markers', name=top_words[i], showlegend=False))

# Create the layout for the plot
layout = go.Layout(title='Projects and Top Words', xaxis=dict(title='Project Name'), yaxis=dict(title='Frequency'))

# Create the figure and show the plot
fig = go.Figure(data=traces, layout=layout)
fig.show()

## gistfile1.txt
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd

# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'

# Jira API endpoint
jira_url = "https://jira.example.com"

# Jira credentials
jira_username = "your_username"
jira_password = "your_password"

# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))

# Execute the Jira query
issues = jira.search_issues(jira_query)

# Initialize list to store frequency distribution data
fdist_data = []

# Iterate over the returned issues
for issue in issues:
    # Extract the project key, summary, and description
    project_key = issue.key
    summary = issue.fields.summary
    description = issue.fields.description

    # Concatenate the summary and description
    text = summary + " " + description

    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Compute frequency distribution
    fdist = FreqDist(tokens)

    # Append the project key and frequency distribution to the data list
    fdist_data.append({'Project Key': project_key, 'Frequency Distribution': dict(fdist)})

# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)

# Sort the DataFrame by Project Key
df = df.sort_values(by=['Project Key'])

# Print the DataFrame
print(df)
df.to_csv('jira_project.csv',index=False)

## gistfile2.txt
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd

# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'

# Jira API endpoint
jira_url = "https://jira.example.com"

# Jira credentials
jira_username = "your_username"
jira_password = "your_password"

# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))

# Initialize list to store frequency distribution data
fdist_data = []

# Initialize starting point
startAt = 0

# Continuously retrieve the next set of issues
while True:
    # Execute the Jira query
    issues = jira.search_issues(jira_query, startAt=startAt)

    # Exit the loop if no more issues are returned
    if len(issues) == 0:
        break

    # Iterate over the returned issues
    for issue in issues:
        # Extract the project key, summary, and description
        project_key = issue.key
        summary = issue.fields.summary
        description = issue.fields.description

        # Concatenate the summary and description
        text = summary + " " + description

        # Tokenize the text
        tokens = word_tokenize(text)

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        tokens = [token for token in tokens if token.lower() not in stop_words]

        # Compute frequency distribution
        fdist = FreqDist(tokens)

        # Get the top 10 most common words
        top_words = fdist.most_common(10)

        # Append the project key and top words to the data list
        fdist_data.append({'Project Key': project_key, 'Top Words': top_words})

    # Update the starting point
    startAt += len(issues)

# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)

# Sort the DataFrame by Project Key
df = df.sort_values(by=['Project Key'])

# Print the DataFrame
print(df)


## gistfile3.txt
import string
from jira import JIRA
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import pandas as pd

# Jira query
jira_query = 'project = "Sample Project" and status = "Closed"'

# Jira API endpoint
jira_url = "https://jira.example.com"

# Jira credentials
jira_username = "your_username"
jira_password = "your_password"

# Connect to Jira
jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))

# Initialize list to store frequency distribution data
fdist_data = []

# Initialize starting point
startAt = 0

# Continuously retrieve the next set of issues
while True:
    # Execute the Jira query
    issues = jira.search_issues(jira_query, startAt=startAt)

    # Exit the loop if no more issues are returned
    if len(issues) == 0:
        break

    # Iterate over the returned issues
    for issue in issues:
        # Extract the project key, project name, summary, and description
        issue_key = issue.key
        project_name = issue.fields.project.name
        summary = issue.fields.summary
        description = issue.fields.description

        # Concatenate the summary and description
        text = summary + " " + description

        # Remove punctuation
        text = text.translate(str.maketrans("", "", string.punctuation))

        # Tokenize the text
        tokens = word_tokenize(text)

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        tokens = [token for token in tokens if token.lower() not in stop_words]

        # Compute frequency distribution
        fdist = FreqDist(tokens)

        # Get the top 10 most common words
        top_words = fdist.most_common(10)

        # Append the project key, project name, summary, and top words to the data list
        fdist_data.append({'Issue Key': issue_key, 'Project Name': project_name, 'Summary': summary, 'Top Words': top_words})

    # Update the starting point
    startAt += len(issues)

# Create a DataFrame from the frequency distribution data
df = pd.DataFrame(fdist_data)

# Sort the DataFrame by Issue Key
df = df.sort_values(by=['Issue Key'])

# Print the DataFrame
print(df)


## nltk-tokenizer.py
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt
import pandas as pd

# Jira project text
jira_project = "This is a sample Jira project text that we will use for demonstration purposes."

# Jira project name
project_name = "Sample Project"

# Tokenize the text
tokens = word_tokenize(jira_project)

# Remove stop words
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]

# Compute frequency distribution
fdist = FreqDist(tokens)

# Plot the frequency distribution using a dot diagram
fdist.plot(30, cumulative=False, title="Jira Project Word Frequency Distribution")
plt.show()

# Create a dictionary of the frequency distribution results
fdist_dict = dict(fdist)

# Create a DataFrame with the project name and frequency distribution results
df = pd.DataFrame({'Project Name': [project_name], 'Frequency Distribution': [fdist_dict]})

# Print the DataFrame
print(df)

#save dataframe to csv
df.to_csv('jira_project.csv',index=False)

## split_top_words.py
df[['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']] = df['Top Words'].apply(lambda x: pd.Series(list(zip(*x))[0]))
	from alluvial import alluvial_diagram

	# Create a list of edges, where each edge represents a connection between a word and a project
	edges = []
	for i, row in df.iterrows():
	project = row['Project Name']
	issue_key = row['Issue Key']
	summary = row['Summary']
	top_words = row['Top Words']
	for word in top_words:
	edges.append((word, project))

	# Create the alluvial flow diagram
	nodes = list(set([i[0] for i in edges] + [i[1] for i in edges]))
	fig = alluvial_diagram(edges, nodes, width=600, height=400)

	# Save the diagram as a JSON file
	with open('alluvial_diagram.json', 'w') as f:
	json.dump(fig, f)
	import plotly.graph_objs as go

	# Create a list of the top 10 words
	top_words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10']

	# Create a list of traces for each project
	traces = []
	for project in df['Project'].unique():
	project_df = df[df['Project'] == project]
	project_df = project_df[['Project'] + top_words]
	traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[0]], mode='markers', name=top_words[0]))
	for i in range(1, len(top_words)):
	traces.append(go.Scatter(x=project_df['Project'], y=project_df[top_words[i]], mode='markers', name=top_words[i], showlegend=False))

	# Create the layout for the plot
	layout = go.Layout(title='Projects and Top Words', xaxis=dict(title='Project Name'), yaxis=dict(title='Frequency'))

	# Create the figure and show the plot
	fig = go.Figure(data=traces, layout=layout)
	fig.show()
	from jira import JIRA
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.probability import FreqDist
	import pandas as pd

	# Jira query
	jira_query = 'project = "Sample Project" and status = "Closed"'

	# Jira API endpoint
	jira_url = "https://jira.example.com"

	# Jira credentials
	jira_username = "your_username"
	jira_password = "your_password"

	# Connect to Jira
	jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))

	# Execute the Jira query
	issues = jira.search_issues(jira_query)

	# Initialize list to store frequency distribution data
	fdist_data = []

	# Iterate over the returned issues
	for issue in issues:
	# Extract the project key, summary, and description
	project_key = issue.key
	summary = issue.fields.summary
	description = issue.fields.description

	# Concatenate the summary and description
	text = summary + " " + description

	# Tokenize the text
	tokens = word_tokenize(text)

	# Remove stop words
	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token.lower() not in stop_words]

	# Compute frequency distribution
	fdist = FreqDist(tokens)

	# Append the project key and frequency distribution to the data list
	fdist_data.append({'Project Key': project_key, 'Frequency Distribution': dict(fdist)})

	# Create a DataFrame from the frequency distribution data
	df = pd.DataFrame(fdist_data)

	# Sort the DataFrame by Project Key
	df = df.sort_values(by=['Project Key'])

	# Print the DataFrame
	print(df)
	df.to_csv('jira_project.csv',index=False)
	import string
	from jira import JIRA
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.probability import FreqDist
	import pandas as pd

	# Jira query
	jira_query = 'project = "Sample Project" and status = "Closed"'

	# Jira API endpoint
	jira_url = "https://jira.example.com"

	# Jira credentials
	jira_username = "your_username"
	jira_password = "your_password"

	# Connect to Jira
	jira = JIRA(jira_url, basic_auth=(jira_username, jira_password))

	# Initialize list to store frequency distribution data
	fdist_data = []

	# Initialize starting point
	startAt = 0

	# Continuously retrieve the next set of issues
	while True:
	# Execute the Jira query
	issues = jira.search_issues(jira_query, startAt=startAt)

	# Exit the loop if no more issues are returned
	if len(issues) == 0:
	break

	# Iterate over the returned issues
	for issue in issues:
	# Extract the project key, project name, summary, and description
	issue_key = issue.key
	project_name = issue.fields.project.name
	summary = issue.fields.summary
	description = issue.fields.description

	# Concatenate the summary and description
	text = summary + " " + description

	# Remove punctuation
	text = text.translate(str.maketrans("", "", string.punctuation))

	# Tokenize the text
	tokens = word_tokenize(text)

	# Remove stop words
	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token.lower() not in stop_words]

	# Compute frequency distribution
	fdist = FreqDist(tokens)

	# Get the top 10 most common words
	top_words = fdist.most_common(10)

	# Append the project key, project name, summary, and top words to the data list
	fdist_data.append({'Issue Key': issue_key, 'Project Name': project_name, 'Summary': summary, 'Top Words': top_words})

	# Update the starting point
	startAt += len(issues)

	# Create a DataFrame from the frequency distribution data
	df = pd.DataFrame(fdist_data)

	# Sort the DataFrame by Issue Key
	df = df.sort_values(by=['Issue Key'])

	# Print the DataFrame
	print(df)