Created
September 24, 2018 11:59
-
-
Save hgnj/b5009f7872792ee695db57f6ef324b19 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ttw として treetaggerwrapper をインポート | |
import treetaggerwrapper as ttw | |
# 初期設定 | |
taglang = "en" #英語 | |
tagdir = "/Users/************/TreeTagger" #TreeTagger関連ファイルのあるディレクトリ | |
tagger = ttw.TreeTagger(TAGLANG=taglang, TAGDIR=tagdir) | |
# 処理をする文を指定 | |
line = "He has lots of books, and I have as many books." | |
# treetaggerによる処理 | |
tags = tagger.TagText(line) | |
# 原形のみをリスト("ori_words")に格納 | |
ori_words = [] | |
for tag in tags: | |
ori_words.append(tag.split("\t")[2]) | |
# 必要に応じて、abc順に並べ替える | |
# ("key=str.lower" をつけると、小文字での並べ替え) | |
# ori_words.sort(key=str.lower) | |
# 表示してチェックしてみる | |
print(ori_words) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment