Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created December 6, 2020 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save amankharwal/6d0c8482661a190e683486b64be713ed to your computer and use it in GitHub Desktop.
Save amankharwal/6d0c8482661a190e683486b64be713ed to your computer and use it in GitHub Desktop.
import nltk
from nltk.corpus import stopwords
import string
from wordcloud import WordCloud
oneSetOfStopWords = set(stopwords.words('english')+['``',"''"])
totalWords =[]
Sentences = resumeDataSet['Resume'].values
cleanedSentences = ""
for i in range(0,160):
cleanedText = cleanResume(Sentences[i])
cleanedSentences += cleanedText
requiredWords = nltk.word_tokenize(cleanedText)
for word in requiredWords:
if word not in oneSetOfStopWords and word not in string.punctuation:
totalWords.append(word)
wordfreqdist = nltk.FreqDist(totalWords)
mostcommon = wordfreqdist.most_common(50)
print(mostcommon)
wc = WordCloud().generate(cleanedSentences)
plt.figure(figsize=(15,15))
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment