Skip to content

Instantly share code, notes, and snippets.

Created Oct 4, 2020
What would you like to do?
import pandas as pd
import numpy as np
import textdistance
import re
from collections import Counter
words = []
with open('moby.txt', 'r') as f:
file_name_data =
words = re.findall('\w+',file_name_data)
# This is our vocabulary
V = set(words)
print(f"The first ten words in the text are: \n{words[0:10]}")
print(f"There are {len(V)} unique words in the vocabulary.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment