naotokui/japanese_word_split.py

## japanese_word_split.py
import MeCab
mt = MeCab.Tagger("-Ochasen")

def wakati_text_mecab(text):
    res = mt.parseToNode(text.encode("utf-8"))

    words = []
    try:
        while res:
            surface = res.surface
            part = res.feature.split(",")[0]
            if part != "BOS/EOS":
                words.append(surface)
            res = res.next
    except Exception as ex:
        print ex

    return ' '.join(words)

wakati = wakati_text_mecab(u"原子番号９２のウランより重い元素は全て人工的に合成され、１１８番まで発見の報告がある.")
print wakati
# 原子 番号 ９ ２ の ウラン より 重い 元素 は 全て 人工 的 に 合成 さ れ 、 １ １ ８ 番 まで 発見 の 報告 が ある 。
	import MeCab
	mt = MeCab.Tagger("-Ochasen")

	def wakati_text_mecab(text):
	res = mt.parseToNode(text.encode("utf-8"))

	words = []
	try:
	while res:
	surface = res.surface
	part = res.feature.split(",")[0]
	if part != "BOS/EOS":
	words.append(surface)
	res = res.next
	except Exception as ex:
	print ex

	return ' '.join(words)

	wakati = wakati_text_mecab(u"原子番号９２のウランより重い元素は全て人工的に合成され、１１８番まで発見の報告がある.")
	print wakati
	# 原子番号９２のウランより重い元素は全て人工的に合成され、１１８番まで発見の報告がある。