haroonrasheed333/test.py

## test.py
from __future__ import division
from nltk.book import *


# Function to calculate the lexical diversity of a text
def lexical_diversity(text):
    return len(text) / len(set(text))


def percentage(count, total):
    return 100 * count / total


def text_stats(text, word):
    """Write a function that takes as input a text object (from nltk.book) and a
    word, and prints out the text?s name, the total number of words in the text,
    the size of the vocabulary, how often the word occurs in the text, the
    concordance for the word, and plots the dispersion of the word in the text."""

    text_name = text.name
    num_words = len(text)
    vocab_size = len(set(text))
    count_word = text.count(word)
    percent_word = percentage(count_word, num_words)

    print("Name of Text: " + text_name)
    print("Number of words in text: " + str(num_words))
    print("The size of vocabulary: " + str(vocab_size))
    print("Number of occurrence of the word " + str(word) + " in the text: " + str(count_word))
    print("Percentage of occurrence of the word " + str(word) + " in the text: " + str(percent_word))
    print("The concordance of the word " + str(word))
    print(text.concordance(word))
    text.dispersion_plot([word])


def main():
    # 1.1 Expressions
    print("SECTION 1.1")
    print("Expressions")
    print 1 + 5 * 2 - 3
    print 2 * 3 + 5 - 2 + (4 / 2)

    # 1.1 Concordance - Occurrence of a word in the text along with context
    print("Concordance of a word in text")
    print("Concordance of the word young in text1 - " + text1.name)
    print(text1.concordance("young"))
    print("Concordance of the word affection in text2 - " + text2.name)
    print(text2.concordance("affection"))
    print("Concordance of the word lived in text3 - " + text3.name)
    print(text3.concordance("lived"))

    # Concordance for the word "love". Returns 5 lines with width 50 each
    print(text1.concordance("love", width=50, lines=5))

    # 1.1 Print all the words that occur in the same range of contexts as the given word "extremely" in text2
    print("Words in same range of contexts as extremely in text2 - " + text2.name)
    print(text2.similar("extremely"))

    # 1.1 Print the common contexts shared by the given words "extremely" and "so" in text2
    print("Common contexts shared by words extremely and so in text2 - " + text2.name)
    print(text2.common_contexts(["extremely", "so"]))

    # Lexical dispersion plot of the given words in text
    text2.dispersion_plot(['very', 'exceedingly', 'heartily', 'remarkably', 'monstrous', 'so'])
    text4.dispersion_plot(['liberty', 'constitution'])

    # Number and percentage of occurrence of the word 'lol' in text5
    count_lol = text5.count('lol')
    print("Number of occurrences of the word lol in text5: " + str(count_lol))
    percent_lol = 100 * text5.count('lol') / len(text5)
    print("Percentage of occurrence of the word lol in text5: " + str(percent_lol))

    # Lexical diversity of text3 and text5
    print("Lexical Diversity of text3: " + str(lexical_diversity(text3)))
    print("Lexical Diversity of text5: " + str(lexical_diversity(text5)))

    # Percentage of occurrence of the word 'a' in text4
    percent_a = percentage(text4.count('a'), len(text4))
    print("Percentage of occurrence of the word a in text4: " + str(percent_a))

    # 1.2 Lists
    # Define a list ex1
    ex1 = ['Courage', 'is', 'not', 'the', 'absence', 'of', 'fear', 'but', 'rather', 'the', 'judgement', 'that', 'something', 'else', 'is', 'more', 'important', 'than', 'fear']

    # Print the sorted list
    print("New List ex1: " + str(ex1))
    print("Sorted list of ex1: " + str(sorted(ex1)))
    print("Length of ex1: " + str(len(ex1)))

    # Print the vocabulary of ex1
    print("Number of vocabulary items in ex1: " + str(len(set(ex1))))

    # Concatenate two lists
    print("Concatenating lists")
    print(['Courage', 'is', 'not', 'the', 'absence', 'of', 'fear', 'but', 'rather'] + ['the', 'judgement', 'that', 'something', 'else', 'is', 'more', 'important', 'than', 'fear'])
    print(sent1 + sent3)

    ex2 = ['What', 'Does', 'Not', 'Kill', 'Me', 'Makes', 'Me', 'Stronger']
    print("New list ex2: " + str(ex2))

    # Print the index of word 'Kill' in ex2
    print("Index of the word Kill in ex2: " + str(ex2.index('Kill')))

    # Print the item in index 3. (index starts from 0)
    print("Item in index 3 of ex2: " + str(ex2[3]))

    # Access sublists using slicing
    print("Access sublists using slicing: ")
    print(ex2[1:3])
    print(ex2[4:7])
    print(ex2[:3])
    print(ex2[3:])

    # Replace items in an index with another
    ex2[4] = 'you'
    ex2[6] = 'you'
    print("Replace some items in ex2: ")
    print(ex2)

    # Replace a sublist (slice) with another
    print("Replacing a sublist(slice) in ex2")
    ex2[1:3] = ['Doesnt']
    print(ex2)

    # 1.3
    # Frequency Distribution - frequency of each vocabulary item in the text
    fdist = FreqDist(text2)

    # Store the vocabulary items of text2 in variable vocab
    vocab = fdist.keys()

    # Print the first 30 vocabulary items
    print("First 30 vocabulary items of text2 - " + text2.name)
    print(vocab[:30])

    # Print the number of occurrences of word 'weakness'
    print("Number of occurrences of the word weakness in text2: " + str(fdist['weakness']))

    # Plot the top 50 high frequency words in the text
    fdist.plot(50, cumulative=True)

    # Print a sorted list of all the words greater than length 14 in text2
    V = set(text2)
    long_words = [word for word in V if len(word) > 14]
    print("Sorted list of all words greater than length 14 in text2 - " + text2.name)
    print(sorted(long_words))

    print("Function to print text stats: ")
    text_stats(text5, 'hello')

if __name__ == '__main__':
    main()
	from __future__ import division
	from nltk.book import *


	# Function to calculate the lexical diversity of a text
	def lexical_diversity(text):
	return len(text) / len(set(text))


	def percentage(count, total):
	return 100 * count / total


	def text_stats(text, word):
	"""Write a function that takes as input a text object (from nltk.book) and a
	word, and prints out the text?s name, the total number of words in the text,
	the size of the vocabulary, how often the word occurs in the text, the
	concordance for the word, and plots the dispersion of the word in the text."""

	text_name = text.name
	num_words = len(text)
	vocab_size = len(set(text))
	count_word = text.count(word)
	percent_word = percentage(count_word, num_words)

	print("Name of Text: " + text_name)
	print("Number of words in text: " + str(num_words))
	print("The size of vocabulary: " + str(vocab_size))
	print("Number of occurrence of the word " + str(word) + " in the text: " + str(count_word))
	print("Percentage of occurrence of the word " + str(word) + " in the text: " + str(percent_word))
	print("The concordance of the word " + str(word))
	print(text.concordance(word))
	text.dispersion_plot([word])


	def main():
	# 1.1 Expressions
	print("SECTION 1.1")
	print("Expressions")
	print 1 + 5 * 2 - 3
	print 2 * 3 + 5 - 2 + (4 / 2)

	# 1.1 Concordance - Occurrence of a word in the text along with context
	print("Concordance of a word in text")
	print("Concordance of the word young in text1 - " + text1.name)
	print(text1.concordance("young"))
	print("Concordance of the word affection in text2 - " + text2.name)
	print(text2.concordance("affection"))
	print("Concordance of the word lived in text3 - " + text3.name)
	print(text3.concordance("lived"))

	# Concordance for the word "love". Returns 5 lines with width 50 each
	print(text1.concordance("love", width=50, lines=5))

	# 1.1 Print all the words that occur in the same range of contexts as the given word "extremely" in text2
	print("Words in same range of contexts as extremely in text2 - " + text2.name)
	print(text2.similar("extremely"))

	# 1.1 Print the common contexts shared by the given words "extremely" and "so" in text2
	print("Common contexts shared by words extremely and so in text2 - " + text2.name)
	print(text2.common_contexts(["extremely", "so"]))

	# Lexical dispersion plot of the given words in text
	text2.dispersion_plot(['very', 'exceedingly', 'heartily', 'remarkably', 'monstrous', 'so'])
	text4.dispersion_plot(['liberty', 'constitution'])

	# Number and percentage of occurrence of the word 'lol' in text5
	count_lol = text5.count('lol')
	print("Number of occurrences of the word lol in text5: " + str(count_lol))
	percent_lol = 100 * text5.count('lol') / len(text5)
	print("Percentage of occurrence of the word lol in text5: " + str(percent_lol))

	# Lexical diversity of text3 and text5
	print("Lexical Diversity of text3: " + str(lexical_diversity(text3)))
	print("Lexical Diversity of text5: " + str(lexical_diversity(text5)))

	# Percentage of occurrence of the word 'a' in text4
	percent_a = percentage(text4.count('a'), len(text4))
	print("Percentage of occurrence of the word a in text4: " + str(percent_a))

	# 1.2 Lists
	# Define a list ex1
	ex1 = ['Courage', 'is', 'not', 'the', 'absence', 'of', 'fear', 'but', 'rather', 'the', 'judgement', 'that', 'something', 'else', 'is', 'more', 'important', 'than', 'fear']

	# Print the sorted list
	print("New List ex1: " + str(ex1))
	print("Sorted list of ex1: " + str(sorted(ex1)))
	print("Length of ex1: " + str(len(ex1)))

	# Print the vocabulary of ex1
	print("Number of vocabulary items in ex1: " + str(len(set(ex1))))

	# Concatenate two lists
	print("Concatenating lists")
	print(['Courage', 'is', 'not', 'the', 'absence', 'of', 'fear', 'but', 'rather'] + ['the', 'judgement', 'that', 'something', 'else', 'is', 'more', 'important', 'than', 'fear'])
	print(sent1 + sent3)

	ex2 = ['What', 'Does', 'Not', 'Kill', 'Me', 'Makes', 'Me', 'Stronger']
	print("New list ex2: " + str(ex2))

	# Print the index of word 'Kill' in ex2
	print("Index of the word Kill in ex2: " + str(ex2.index('Kill')))

	# Print the item in index 3. (index starts from 0)
	print("Item in index 3 of ex2: " + str(ex2[3]))

	# Access sublists using slicing
	print("Access sublists using slicing: ")
	print(ex2[1:3])
	print(ex2[4:7])
	print(ex2[:3])
	print(ex2[3:])

	# Replace items in an index with another
	ex2[4] = 'you'
	ex2[6] = 'you'
	print("Replace some items in ex2: ")
	print(ex2)

	# Replace a sublist (slice) with another
	print("Replacing a sublist(slice) in ex2")
	ex2[1:3] = ['Doesnt']
	print(ex2)

	# 1.3
	# Frequency Distribution - frequency of each vocabulary item in the text
	fdist = FreqDist(text2)

	# Store the vocabulary items of text2 in variable vocab
	vocab = fdist.keys()

	# Print the first 30 vocabulary items
	print("First 30 vocabulary items of text2 - " + text2.name)
	print(vocab[:30])

	# Print the number of occurrences of word 'weakness'
	print("Number of occurrences of the word weakness in text2: " + str(fdist['weakness']))

	# Plot the top 50 high frequency words in the text
	fdist.plot(50, cumulative=True)

	# Print a sorted list of all the words greater than length 14 in text2
	V = set(text2)
	long_words = [word for word in V if len(word) > 14]
	print("Sorted list of all words greater than length 14 in text2 - " + text2.name)
	print(sorted(long_words))

	print("Function to print text stats: ")
	text_stats(text5, 'hello')

	if __name__ == '__main__':
	main()