Skip to content

Instantly share code, notes, and snippets.

@nikhilkumarsingh
Created September 10, 2017 06:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nikhilkumarsingh/f8cc0590b1b7967ee172a5a0e1a8507b to your computer and use it in GitHub Desktop.
Save nikhilkumarsingh/f8cc0590b1b7967ee172a5a0e1a8507b to your computer and use it in GitHub Desktop.
Wordcloud of wikipedia articles
from os import path
import numpy as np
from PIL import Image
import wikipedia
from wordcloud import WordCloud, STOPWORDS
# get path to script's directory
currdir = path.dirname(__file__)
def get_wiki(query):
# get best matching title for given query
title = wikipedia.search(query)[0]
# get wikipedia page for selected title
page = wikipedia.page(title)
return page.content
def create_wordcloud(text):
# create numpy araay for wordcloud mask image
mask = np.array(Image.open(path.join(currdir, "cloud1.png")))
# create set of stopwords
stopwords = set(STOPWORDS)
# create wordcloud object
wc = WordCloud(background_color="white",
max_words=200,
mask=mask,
stopwords=stopwords)
# generate wordcloud
wc.generate(text)
# save wordcloud
wc.to_file(path.join(currdir, "wcloud.png"))
if __name__ == "__main__":
# get text for article
text = get_wiki("python programming language")
# generate wordcloud
create_wordcloud(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment