Created
October 12, 2021 18:23
-
-
Save galihboy/0e72e254d500888dc8bf50178e4bd956 to your computer and use it in GitHub Desktop.
menulis bilangan - clustering kalimat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
@author: galih-hermawan | |
""" | |
def PengelompokanKalimat(kalimat): | |
lstKata = kalimat.split() | |
lstSatuan = [] | |
# mengambil komponen (kata) terakhir dari teks | |
kataTerakhir = lstKata[-1] | |
# cari satuan dan tampung di list | |
lstSatuan = [[kamusSatuan[i], no, i] for no, i in enumerate(lstKata) if i in kamusSatuan] | |
lstSatuanProses = [] | |
lstCek = lstSatuan.copy() | |
idxAwal = 0 | |
if len(lstSatuan) == 0: | |
# jika kalimat tidak mengandung satuan, langsung keluarkan | |
lstSatuanProses = [kalimat] | |
else: | |
while len(lstCek) != 0: | |
# cari satuan paling besar, temukan indeksnya, | |
# dan ambil semua kata dari kiri hingga indeks tersebut | |
maks = max(lstCek) | |
idx = maks[1] | |
lstAmbil = lstKata[idxAwal:idx+1] | |
teks = " ".join(lstAmbil) | |
lstSatuanProses.append(teks) | |
# indeks awal pindah ke sebelah kanannya | |
idxAwal = idx+1 | |
idCek = lstCek.index(maks) | |
# hapus isi list yang sudah dibaca, dan isinya dipindahkan | |
del lstCek[0:idCek+1] | |
# jika terdapat angka (bukan satuan) di bagian terakhir kalimat | |
if kataTerakhir in kamusAngka: | |
lstSatuanProses.append(kataTerakhir) | |
return lstSatuanProses | |
# tambahkan ketiga kamus yang sudah dibuat sebelumnya di sini. | |
# kamusAngka | |
# kamusSatuan | |
# kamusAlias | |
# tambahkan fungsi KalimatBaru | |
kalimat = "dua ratus lima puluh" | |
kalimat = KalimatBaru(kalimat) | |
print(PengelompokanKalimat(kalimat)) # output: ['dua ratus', 'lima puluh'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment