Matteo Mazzola chumpblocckami

## gist:f7e8e3c8b7f7c60689bbb1207ffe997c
import nltk
from nltk import *
import random
from nltk.corpus.reader import CategorizedPlaintextCorpusReader

file = "dataset"
collCategorized= CategorizedPlaintextCorpusReader(file, r'.*\.txt', cat_pattern=r'(\w+)/*',encoding="utf8")
documents=[(list(collCategorized.words(fileid)), category) for category in collCategorized.categories()
                for fileid in collCategorized.fileids(category)]
random.shuffle(documents)
	import nltk
	from nltk import *
	import random
	from nltk.corpus.reader import CategorizedPlaintextCorpusReader

	file = "dataset"
	collCategorized= CategorizedPlaintextCorpusReader(file, r'.\.txt', cat_pattern=r'(\w+)/',encoding="utf8")
	documents=[(list(collCategorized.words(fileid)), category) for category in collCategorized.categories()
	for fileid in collCategorized.fileids(category)]
	random.shuffle(documents)