henziger/tre-bra.py

## tre-bra.py
from collections import Counter
import re

# Open the text file, read the contents and split it into a list of words
words = [w.lower() for w in open("tre-bra.txt", "r").read().split()]

# Clean the words from punctuation characters and stuff
words = [re.sub('[(){}<>,.?!"]', '', w) for w in words]
# Remove uninteresting words
removals = ["1", "2", "3", "4", "ut", "få", "blev", "inte", "höll", "och", "in", "massa", "sista", "den", "mot",
            "upp", "var", "par", "hade", "med", "och", "på", "om", "i", "att", "en", "för", "av", "från", "ett",
            "till", "lite", "som", "det", "när", "är", "var", "de", "ta", "sa", "så", "utan", "vad", "det",
            "har", "tog", "min", "mina", "över", "ihop", "fick", "mycket", "ville", "många", "mig", "jag", "gjorde"]
words = [w for w in words if w not in removals]

# Filter out words that are popular (occurring more than 3 times)
popular_words = dict(filter(lambda elem: elem[1] > 3, Counter(words).items()))

# Pseudonomynize and style words, e.g. replace sensitive names with K-pop idols
aliases = {"name1": "Yves",
           "name2": "Choerry",
           "name3": "Chuu",
           "name4": "Vivi",
           "eim": "EIM",
           "zelda": "Zelda"}
for key in aliases.keys():
    popular_words[aliases[key]] = popular_words[key]
    popular_words.pop(key)

# Print all the popular words so that we can feed them to the word cloud generator
for item in popular_words.items():
    for i in range(item[1]):
        print(" " + item[0], end='')
print()
	from collections import Counter
	import re

	# Open the text file, read the contents and split it into a list of words
	words = [w.lower() for w in open("tre-bra.txt", "r").read().split()]

	# Clean the words from punctuation characters and stuff
	words = [re.sub('[(){}<>,.?!"]', '', w) for w in words]
	# Remove uninteresting words
	removals = ["1", "2", "3", "4", "ut", "få", "blev", "inte", "höll", "och", "in", "massa", "sista", "den", "mot",
	"upp", "var", "par", "hade", "med", "och", "på", "om", "i", "att", "en", "för", "av", "från", "ett",
	"till", "lite", "som", "det", "när", "är", "var", "de", "ta", "sa", "så", "utan", "vad", "det",
	"har", "tog", "min", "mina", "över", "ihop", "fick", "mycket", "ville", "många", "mig", "jag", "gjorde"]
	words = [w for w in words if w not in removals]

	# Filter out words that are popular (occurring more than 3 times)
	popular_words = dict(filter(lambda elem: elem[1] > 3, Counter(words).items()))

	# Pseudonomynize and style words, e.g. replace sensitive names with K-pop idols
	aliases = {"name1": "Yves",
	"name2": "Choerry",
	"name3": "Chuu",
	"name4": "Vivi",
	"eim": "EIM",
	"zelda": "Zelda"}
	for key in aliases.keys():
	popular_words[aliases[key]] = popular_words[key]
	popular_words.pop(key)

	# Print all the popular words so that we can feed them to the word cloud generator
	for item in popular_words.items():
	for i in range(item[1]):
	print(" " + item[0], end='')
	print()