Created
May 10, 2015 07:33
-
-
Save Madhuka/34fe46ce2924838949e6 to your computer and use it in GitHub Desktop.
Trying out NLTK http://madhukaudantha.blogspot.com/2015/05/natural-language-toolkit-nltk-sample.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
from nltk.book import * | |
#Enter their names to find out about these texts | |
print "===Text Details===" | |
print text3 | |
#Length of a text from start to finish, in terms of the words and punctuation symbols that appear. | |
print 'Length of Text: '+str(len(text3)) | |
#Text is just the set of tokens | |
#print sorted(set(text3)) | |
print 'Length of Token: '+str(len(set(text3))) | |
#lexical richness of the text | |
def lexical_richness(text): | |
return len(set(text)) / len(text) | |
#percentage of the text is taken up by a specific word | |
def percentage(word, text): | |
return (100 * text.count(word) / len(text)) | |
print 'Lexical richness of the text: '+str(lexical_richness(text3)) | |
print 'Percentage: '+ str(percentage('God',text3)) +'%'; | |
#count the word in the Text | |
print "===Count===" | |
print text3.count("Adam") | |
#'concordance()' view shows us every occurrence of a given word, together with some context. | |
#Here 'Adam' search in 'The Book of Genesis' | |
print "===Concordance===" | |
print text3.concordance("Adam") | |
#Appending the term similar to the name of the text | |
print "===Similar===" | |
print text3.similar("Adam") | |
#Contexts are shared by two or more words | |
print "===Common Contexts===" | |
text3.common_contexts(["Adam", "Noah"]) | |
text3.dispersion_plot(["God","Adam", "Eve", "Noah", "Abram","Sarah", "Joseph", "Shem", "Isaac"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment