pip3 install -U nltk
pip3 install -U numpy
tokenize query
$ python3
Python 3.6.2 (default, Jul 17 2017, 16:44:45)
[GCC 4.2.1 Compatible Apple LLVM 8.1.0 (clang-802.0.42)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import nltk
>>> nltk.download('punkt')
[nltk_data] Downloading package punkt to /Users/a1353612/nltk_data...
[nltk_data] Unzipping tokenizers/punkt.zip.
True
>>> query_tokens = nltk.word_tokenize(query)
>>> query_tokens
['where', 'is', 'my', 'order', 'i', 'bought', 'last', 'saturday']
part of speech(pos) https://www.kaggle.com/nltkdata/averaged-perceptron-tagger
>>> nltk.download('averaged_perceptron_tagger')
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data] /Users/a1353612/nltk_data...
[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.
True
>>> perc_tagged = nltk.pos_tag(query_tokens)
>>> perc_tagged[0:6]
[('where', 'WRB'), ('is', 'VBZ'), ('my', 'PRP$'), ('order', 'NN'), ('i', 'NN'), ('bought', 'VBD')]
nltk.download('maxent_ne_chunker')
nltk.download('words')
>>> entities = nltk.chunk.ne_chunk(perc_tagged)
>>> entities
Tree('S', [('where', 'WRB'), ('is', 'VBZ'), ('my', 'PRP$'), ('order', 'NN'), ('i', 'NN'), ('bought', 'VBD'), ('last', 'JJ'), ('saturday', 'NN')])
nltk.download('treebank')
>>> from nltk.corpus import treebank
>>> t = treebank.parsed_sents('wsj_0001.mrg')[0]
>>> t.draw
<bound method Tree.draw of Tree('S', [Tree('NP-SBJ', [Tree('NP', [Tree('NNP', ['Pierre']), Tree('NNP', ['Vinken'])]), Tree(',', [',']), Tree('ADJP', [Tree('NP', [Tree('CD', ['61']), Tree('NNS', ['years'])]), Tree('JJ', ['old'])]), Tree(',', [','])]), Tree('VP', [Tree('MD', ['will']), Tree('VP', [Tree('VB', ['join']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['board'])]), Tree('PP-CLR', [Tree('IN', ['as']), Tree('NP', [Tree('DT', ['a']), Tree('JJ', ['nonexecutive']), Tree('NN', ['director'])])]), Tree('NP-TMP', [Tree('NNP', ['Nov.']), Tree('CD', ['29'])])])]), Tree('.', ['.'])])>
261578874264819908609102035485573088411 / 150
2 to the power 128