Skip to content

Instantly share code, notes, and snippets.

@elyasha
Created October 31, 2020 15:20
Show Gist options
  • Save elyasha/cb3e3616136bb191ed37dbfd1510ce91 to your computer and use it in GitHub Desktop.
Save elyasha/cb3e3616136bb191ed37dbfd1510ce91 to your computer and use it in GitHub Desktop.
import nltk, re
from nltk.tokenize import word_tokenize
# importing ngrams module from nltk
from nltk.util import ngrams
from collections import Counter
from looking_glass import looking_glass_full_text
cleaned = re.sub('\W+', ' ', looking_glass_full_text).lower()
tokenized = word_tokenize(cleaned)
# Change the n value to 2:
looking_glass_bigrams = ngrams(tokenized, 2)
looking_glass_bigrams_frequency = Counter(looking_glass_bigrams)
# Change the n value to 3:
looking_glass_trigrams = ngrams(tokenized, 3)
looking_glass_trigrams_frequency = Counter(looking_glass_trigrams)
# Change the n value to a number greater than 3:
looking_glass_ngrams = ngrams(tokenized, 8)
looking_glass_ngrams_frequency = Counter(looking_glass_ngrams)
print("Looking Glass Bigrams:")
print(looking_glass_bigrams_frequency.most_common(10))
print("\nLooking Glass Trigrams:")
print(looking_glass_trigrams_frequency.most_common(10))
print("\nLooking Glass n-grams:")
print(looking_glass_ngrams_frequency.most_common(10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment