Created
August 15, 2023 20:13
-
-
Save zinedkaloc/631f8e18322e792009125fa2f619b2d0 to your computer and use it in GitHub Desktop.
Analyse data.json and generate wordcloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from collections import Counter | |
import re | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
# Load the data from a file | |
with open('data.json', 'r') as file: | |
data = json.load(file) | |
# Here, 'data' is now a Python dictionary containing your JSON data | |
contents = [item['content'] for item in data['result']] | |
# Use a regular expression to split the text into words | |
words = [re.findall(r'\w+', content.lower()) for content in contents] | |
words = [word for sublist in words for word in sublist] | |
# Define the list of stop words that you want to exclude | |
stop_words = {'a', 'of', 'for', 'page', 'and', 'the', 'to', 'with', 'in', 'that', 'my', 'is', 'an', 'on', 'it', 'i', 'as', 'creat', 'your', 'make', 'or', 's', 'system', 'services', 'me', 'our', 'can', 'you', 'we', 'have', 'their', 'from', 'by', 'e', 'like', 'be', 'called','t','4','us','1'} | |
# Filter out the stop words from the list of words | |
filtered_words = [word for word in words if word not in stop_words] | |
# Count the frequency of each word | |
word_count = Counter(filtered_words) | |
# Generate the word cloud | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_count) | |
# Display the word cloud using matplotlib | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") | |
plt.tight_layout(pad=0) | |
# Show the plot | |
plt.show() | |
# Save the word cloud as a PNG file | |
wordcloud.to_file('wordcloud.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment