Skip to content

Instantly share code, notes, and snippets.

from collections import Counter
import re
# Open the text file, read the contents and split it into a list of words
words = [w.lower() for w in open("tre-bra.txt", "r").read().split()]
# Clean the words from punctuation characters and stuff
words = [re.sub('[(){}<>,.?!"]', '', w) for w in words]
# Remove uninteresting words
removals = ["1", "2", "3", "4", "ut", "få", "blev", "inte", "höll", "och", "in", "massa", "sista", "den", "mot",