zinedkaloc/analyse.py

## analyse.py
import json
from collections import Counter
import re
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Load the data from a file
with open('data.json', 'r') as file:
    data = json.load(file)

# Here, 'data' is now a Python dictionary containing your JSON data
contents = [item['content'] for item in data['result']]

# Use a regular expression to split the text into words
words = [re.findall(r'\w+', content.lower()) for content in contents]
words = [word for sublist in words for word in sublist]

# Define the list of stop words that you want to exclude
stop_words = {'a', 'of', 'for', 'page', 'and', 'the', 'to', 'with', 'in', 'that', 'my', 'is', 'an', 'on', 'it', 'i', 'as', 'creat', 'your', 'make', 'or', 's', 'system', 'services', 'me', 'our', 'can', 'you', 'we', 'have', 'their', 'from', 'by', 'e', 'like', 'be', 'called','t','4','us','1'}

# Filter out the stop words from the list of words
filtered_words = [word for word in words if word not in stop_words]

# Count the frequency of each word
word_count = Counter(filtered_words)

# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_count)

# Display the word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.tight_layout(pad=0)

# Show the plot
plt.show()

# Save the word cloud as a PNG file
wordcloud.to_file('wordcloud.png')
	import json
	from collections import Counter
	import re
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt

	# Load the data from a file
	with open('data.json', 'r') as file:
	data = json.load(file)

	# Here, 'data' is now a Python dictionary containing your JSON data
	contents = [item['content'] for item in data['result']]

	# Use a regular expression to split the text into words
	words = [re.findall(r'\w+', content.lower()) for content in contents]
	words = [word for sublist in words for word in sublist]

	# Define the list of stop words that you want to exclude
	stop_words = {'a', 'of', 'for', 'page', 'and', 'the', 'to', 'with', 'in', 'that', 'my', 'is', 'an', 'on', 'it', 'i', 'as', 'creat', 'your', 'make', 'or', 's', 'system', 'services', 'me', 'our', 'can', 'you', 'we', 'have', 'their', 'from', 'by', 'e', 'like', 'be', 'called','t','4','us','1'}

	# Filter out the stop words from the list of words
	filtered_words = [word for word in words if word not in stop_words]

	# Count the frequency of each word
	word_count = Counter(filtered_words)

	# Generate the word cloud
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_count)

	# Display the word cloud using matplotlib
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis("off")
	plt.tight_layout(pad=0)

	# Show the plot
	plt.show()

	# Save the word cloud as a PNG file
	wordcloud.to_file('wordcloud.png')