Skip to content

Instantly share code, notes, and snippets.

@Madhuka
Created May 10, 2015 07:33
Show Gist options
  • Save Madhuka/34fe46ce2924838949e6 to your computer and use it in GitHub Desktop.
Save Madhuka/34fe46ce2924838949e6 to your computer and use it in GitHub Desktop.
from __future__ import division
from nltk.book import *
#Enter their names to find out about these texts
print "===Text Details==="
print text3
#Length of a text from start to finish, in terms of the words and punctuation symbols that appear.
print 'Length of Text: '+str(len(text3))
#Text is just the set of tokens
#print sorted(set(text3))
print 'Length of Token: '+str(len(set(text3)))
#lexical richness of the text
def lexical_richness(text):
return len(set(text)) / len(text)
#percentage of the text is taken up by a specific word
def percentage(word, text):
return (100 * text.count(word) / len(text))
print 'Lexical richness of the text: '+str(lexical_richness(text3))
print 'Percentage: '+ str(percentage('God',text3)) +'%';
#count the word in the Text
print "===Count==="
print text3.count("Adam")
#'concordance()' view shows us every occurrence of a given word, together with some context.
#Here 'Adam' search in 'The Book of Genesis'
print "===Concordance==="
print text3.concordance("Adam")
#Appending the term similar to the name of the text
print "===Similar==="
print text3.similar("Adam")
#Contexts are shared by two or more words
print "===Common Contexts==="
text3.common_contexts(["Adam", "Noah"])
text3.dispersion_plot(["God","Adam", "Eve", "Noah", "Abram","Sarah", "Joseph", "Shem", "Isaac"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment