kshepp/nltk_parts_of_speech.py

## nltk_parts_of_speech.py
from __future__ import division
import nltk, re
from nltk import FreqDist
from nltk import word_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import brown

tokenizer = RegexpTokenizer(r'\w+') # Takes out the punctuation that Python can't read
f= open('Conservative.txt').read().decode('utf-8') # open the file
text = word_tokenize(f)

nouns = set()
for word, pos in nltk.pos_tag(text): # remove the call to nltk.pos_tag if `sentence` is a list of tuples as described above
    if pos in ['NN', 'ADJ']: # can change tags here to other parts of speech
        nouns.add(word)

freqs = nltk.FreqDist([w.lower() for w in nouns])

print freqs.most_common(50)
	from __future__ import division
	import nltk, re
	from nltk import FreqDist
	from nltk import word_tokenize
	from nltk.tokenize import RegexpTokenizer
	from nltk.corpus import brown

	tokenizer = RegexpTokenizer(r'\w+') # Takes out the punctuation that Python can't read
	f= open('Conservative.txt').read().decode('utf-8') # open the file
	text = word_tokenize(f)

	nouns = set()
	for word, pos in nltk.pos_tag(text): # remove the call to nltk.pos_tag if `sentence` is a list of tuples as described above
	if pos in ['NN', 'ADJ']: # can change tags here to other parts of speech
	nouns.add(word)

	freqs = nltk.FreqDist([w.lower() for w in nouns])

	print freqs.most_common(50)