Skip to content

Instantly share code, notes, and snippets.

@pebbie
Created March 25, 2013 11:23
Show Gist options
  • Save pebbie/5236492 to your computer and use it in GitHub Desktop.
Save pebbie/5236492 to your computer and use it in GitHub Desktop.
#
# Panduan Penggunaan pebahasa untuk POS Tagger
#
from hmmtagger import MainTagger
from tokenization import *
mt = None
def init_tag():
global mt
if mt is None:
mt = MainTagger("resource/Lexicon.trn", "resource/Ngram.trn", 0, 3, 3, 0, 0, False, 0.2, 0, 500.0, 1)
def do_tag(str_masukan):
lines = str_masukan.strip().split("\n")
result = []
try:
init_tag()
for l in lines:
if len(l) == 0: continue
#sentence_extraction, cleaning, dan tokenisasi_kalimat ada di file tokenization.py
out = sentence_extraction(cleaning(l))
for o in out:
strtag = " ".join(tokenisasi_kalimat(o)).strip()
result += [" ".join(mt.taggingStr(strtag))]
except:
return "Error Exception"
return "\n".join(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment