-
-
Save amontalenti/7975313 to your computer and use it in GitHub Desktop.
>>> from nltk import word_tokenize | |
>>> from nltk.collocations import BigramCollocationFinder | |
>>> text = "obama says that obama says that the war is happening" | |
>>> finder = BigramCollocationFinder.from_words(word_tokenize(text)) | |
>>> finder.items()[0:5] | |
[(('obama', 'says'), 2), | |
(('says', 'that'), 2), | |
(('is', 'happening'), 1), | |
(('that', 'obama'), 1), | |
(('that', 'the'), 1)] |
use nbest works. Ref http://www.nltk.org/howto/collocations.html
>>> from nltk.collocations import BigramCollocationFinder
>>> bigram_measures = nltk.collocations.BigramAssocMeasures()
>>> text = "obama says that obama says that the war is happening"
>>> finder = BigramCollocationFinder.from_words(word_tokenize(text))
>>> finder.nbest(bigram_measures.pmi, 5)
[('is', 'happening'),
('the', 'war'),
('war', 'is'),
('obama', 'says'),
('says', 'that')]
for k,v in finder.ngram_fd.items():
print(k,v)
runfile('/Users/mjalal/embeddings/glove/GloVe-1.2/most_common_bigram.py', wdir='/Users/mjalal/embeddings/glove/GloVe-1.2')
Traceback (most recent call last):
File "/Users/mjalal/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 1, in
runfile('/Users/mjalal/embeddings/glove/GloVe-1.2/most_common_bigram.py', wdir='/Users/mjalal/embeddings/glove/GloVe-1.2')
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/mjalal/embeddings/glove/GloVe-1.2/most_common_bigram.py", line 6, in
print(finder.items()[0:5])
AttributeError: 'BigramCollocationFinder' object has no attribute 'items'
Here items is not an attribute of the class 'BigramCollocationFinder', it will give an AttributeError