computingfreak/y.py

## y.py
# -*- coding: utf-8 -*-
from __future__ import print_function

from natto import MeCab

def tokenize(text):
    tokens = []
    with MeCab('-F%f[0],%f[6]') as nm:
        for n in nm.parse(text, as_nodes=True):
            # ignore any end-of-sentence nodes
            if not n.is_eos() and n.is_nor():
                klass, word = n.feature.split(',', 1)
                #if clazz != u'BOS/EOS':
                    #word_class.append((word, clazz))
                if klass in ['名詞', '形容詞', '形容動詞','動詞']:
                    tokens.append(word)
    return tokens

tkns = tokenize('私の名前は太郎です。')
for tk in tkns:
    print(tk)
	# -- coding: utf-8 --
	from __future__ import print_function

	from natto import MeCab

	def tokenize(text):
	tokens = []
	with MeCab('-F%f[0],%f[6]') as nm:
	for n in nm.parse(text, as_nodes=True):
	# ignore any end-of-sentence nodes
	if not n.is_eos() and n.is_nor():
	klass, word = n.feature.split(',', 1)
	#if clazz != u'BOS/EOS':
	#word_class.append((word, clazz))
	if klass in ['名詞', '形容詞', '形容動詞','動詞']:
	tokens.append(word)
	return tokens

	tkns = tokenize('私の名前は太郎です。')
	for tk in tkns:
	print(tk)