Last active
May 1, 2020 19:44
-
-
Save lewtds/426fcc9ad9e9a3b258ea91182ce19148 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:- set_prolog_flag(encoding, utf8). | |
% Needs SWI-Prolog version >= 8.1.13 for with_tty_raw | |
% | |
% Usage example: | |
% ?- consult('bogo.prolog'). | |
% ?- interactive. | |
% | |
% Then start typing. | |
% | |
% [t],t | |
% [t,r],tr | |
% [t,r,u],tru | |
% [t,r,u,o],truo | |
% [t,r,u,o,w],trươ | |
% [t,r,u,o,w,n],trươn | |
% [t,r,u,o,w,n,g],trương | |
% [t,r,u,o,w,n,g,f],trường | |
% [h],h | |
% [h,o],ho | |
% [h,o,p],hop | |
% [h,o,p,w],hơp | |
% [h,o,p,w,j],hợp | |
interactive :- with_tty_raw(loop_process_key([])). | |
:- set_flag(bogo:dau_moi, true). | |
% food for thought: | |
% gìn giữ | |
% giặt gịa | |
% giặt gyạ (archaic) | |
% giặt gỵa | |
% context sensitive grammar? | |
loop_process_key(Sequence) :- | |
get_char(C), | |
( C \= ' ', | |
append(Sequence, [C], Sequence2), | |
process_key_sequence(Sequence2, [], OutCharList), | |
atom_chars(Atom, OutCharList), | |
writeln((Sequence2, Atom)), | |
loop_process_key(Sequence2) | |
; loop_process_key([]) % start a new sequence with space character | |
). | |
% TODO: Upper case support | |
% TELEX | |
key_effect(f, add_tone(tone_huyen)). | |
key_effect(s, add_tone(tone_sac)). | |
key_effect(r, add_tone(tone_hoi)). | |
key_effect(x, add_tone(tone_nga)). | |
key_effect(j, add_tone(tone_nang)). | |
key_effect(a, add_vowel_mod(mod_hat_a)). | |
key_effect(e, add_vowel_mod(mod_hat_e)). | |
key_effect(o, add_vowel_mod(mod_hat_o)). | |
key_effect(w, add_vowel_mod(mod_horn_uo)). | |
key_effect(w, add_vowel_mod(mod_horn_o)). | |
key_effect(w, add_vowel_mod(mod_horn_u)). | |
key_effect(w, add_vowel_mod(mod_breve_a)). | |
key_effect(d, add_consonant_mod(mod_dash_d)). | |
% VNI | |
key_effect('1', add_tone(tone_sac)). | |
key_effect('2', add_tone(tone_huyen)). | |
key_effect('3', add_tone(tone_hoi)). | |
key_effect('4', add_tone(tone_nga)). | |
key_effect('5', add_tone(tone_nang)). | |
key_effect('6', add_vowel_mod(mod_hat_a)). | |
key_effect('6', add_vowel_mod(mod_hat_e)). | |
key_effect('6', add_vowel_mod(mod_hat_o)). | |
key_effect('7', add_vowel_mod(mod_horn_uo)). | |
key_effect('7', add_vowel_mod(mod_horn_o)). | |
key_effect('7', add_vowel_mod(mod_horn_u)). | |
key_effect('8', add_vowel_mod(mod_breve_a)). | |
key_effect('9', add_consonant_mod(mod_dash_d)). | |
% Usage example: | |
% | |
% ?- process_key_sequence([b, a, n, s, w], [] , Output). | |
% Output = [b, ắ, n]. | |
% | |
process_atom(AtomIn, OutList) :- | |
atom_chars(AtomIn, Sequence), | |
process_key_sequence(Sequence, OutList). | |
process_key_sequence(Sequence, Output) :- | |
process_key_sequence(Sequence, [], Output). | |
process_key_sequence([Key|Rest], CurrentString, Output) :- | |
once(process_key(CurrentString, Key, Output1)), | |
process_key_sequence(Rest, Output1, Output). | |
process_key_sequence([], CurrentString, CurrentString). | |
% fall back to the raw sequence if processing fails (vowel -> vơel) | |
process_key_sequence(Keys, [], Keys). | |
% Try applying the effect of the key, assuming that there is one. | |
% Performance optimization idea: give out both an output string and a syllable term and reuse the term for the next cycle. | |
process_key(CurrentString, Key, OutString) :- | |
% break the syllable down into initial consonant, vowel nucleus and final consonant | |
once(phrase(syllable(I, V, F), CurrentString)), | |
apply_key_effect(s(I, V, F), Key, Out), | |
rebalance_incomplete_form(Out, CleanedSyllable), | |
syllable_atom(CleanedSyllable, OutputAtom), | |
atom_chars(OutputAtom, OutString), | |
% require that the output must be parsable, this is to weed out non-Vietnamese words like vowel -> vơel | |
once(phrase(syllable(_, _, _), OutString)). | |
syllable_atom(s(I, V, F), Atom) :- atomic_list_concat([I, V, F], Atom). | |
apply_key_effect(InSyllable, Key, OutSyllable) :- | |
key_effect(Key, add_tone(Tone)), | |
syllable_tone(InSyllable, Tone, OutSyllable). | |
apply_key_effect(InSyllable, Key, OutSyllable) :- | |
key_effect(Key, add_vowel_mod(Mod)), | |
syllable_vowel_mod(InSyllable, Mod, OutSyllable). | |
apply_key_effect(InSyllable, Key, OutSyllable) :- | |
key_effect(Key, add_consonant_mod(Mod)), | |
syllable_consonant_mod(InSyllable, Mod, OutSyllable). | |
% If none of the transformation rules apply, append the key to the end of the syllable | |
apply_key_effect(s(I, V, ''), Key, s(I, V2, '')) :- vowel_char(Key, [Key], []), atom_concat(V, Key, V2). | |
apply_key_effect(s(I, '', ''), Key, s(I2, '', '')) :- atom_concat(I, Key, I2). | |
apply_key_effect(s(I, V, F), Key, s(I, V, F2)) :- atom_concat(F, Key, F2). | |
syllable_vowel_mod(s(I, V, F), Mod, s(I, V_out, F)) :- | |
vowel_nucleus_mod_tone(s(I, V, F), s(I, V_raw, F), _, Tone), | |
vowel_nucleus_mod_tone(s(I, V_out, F), s(I, V_raw, F), Mod, Tone). | |
syllable_tone(s(I, V, F), Tone, s(I, V_out, F)) :- | |
vowel_nucleus_mod_tone(s(I, V, F), s(I, V_raw, F), Mod, _), | |
vowel_nucleus_mod_tone(s(I, V_out, F), s(I, V_raw, F), Mod, Tone). | |
% Eg: to turn thuơng into thương, mịen to miẹn | |
rebalance_incomplete_form(s(I, V, F), s(I, V_out, F)) :- | |
vowel_nucleus_mod_tone(s(I, V, F), s(I, V_raw, F), Mod, Tone), | |
vowel_nucleus_mod_tone(s(I, V_out, F), s(I, V_raw, F), Mod, Tone). | |
% there is the only case of d -> đ here so it's hard-coded | |
syllable_consonant_mod(s(d, V, F), _, s(đ, V, F)). | |
%%%%%%%%%%%%% | |
% Parser section | |
%%%%%%%%%%%%% | |
syllable(I, V, F) --> consonant_initial(I), vowel(V), consonant_final(F). | |
vowel(V) --> vowel_chars(Vs), { atom_chars(V, Vs) }. | |
vowel_chars([X|Rest]) --> vowel_char(X), vowel_chars(Rest). | |
vowel_chars([]) --> []. | |
vowel_char(C) --> [C], { | |
atom_chars(àáảãạaằắẳẵặăầấẩẫậâèéẻẽẹeềếểễệêìíỉĩịiòóỏõọoồốổỗộôờớởỡợơùúủũụuừứửữựưỳýỷỹỵy, AtomList), | |
member(C, AtomList) | |
}. | |
consonant_initial('') --> []. | |
consonant_initial(m) --> [m]. | |
consonant_initial(n) --> [n]. | |
consonant_initial(nh) --> [n, h]. | |
consonant_initial(ng) --> [n, g]. | |
consonant_initial(ngh) --> [n, g, h]. | |
consonant_initial(p) --> [p]. | |
consonant_initial(t) --> [t]. | |
consonant_initial(tr) --> [t, r]. | |
consonant_initial(ch) --> [c, h]. | |
consonant_initial(c) --> [c]. | |
consonant_initial(k) --> [k]. | |
consonant_initial(qu) --> [q, u]. | |
consonant_initial(q) --> [q]. % I'm a bit iffy on this case | |
consonant_initial(ph) --> [p, h]. | |
consonant_initial(th) --> [t, h]. | |
consonant_initial(kh) --> [k, h]. | |
consonant_initial(b) --> [b]. | |
consonant_initial(đ) --> [đ]. | |
consonant_initial(s) --> [s]. | |
consonant_initial(x) --> [x]. | |
consonant_initial(h) --> [h]. | |
consonant_initial(d) --> [d]. | |
consonant_initial(gi), [V] --> [g, i], vowel(V). | |
consonant_initial(g) --> [g]. | |
consonant_initial(gh) --> [g, h]. | |
consonant_initial(v) --> [v]. | |
consonant_initial(l) --> [l]. | |
consonant_initial(r) --> [r]. | |
% for the dzũng zũng people | |
consonant_initial(dz) --> [d, z]. | |
consonant_initial(z) --> [z]. | |
consonant_final('') --> []. | |
consonant_final(n) --> [n]. | |
consonant_final(nh) --> [n, h]. | |
consonant_final(ng) --> [n, g]. | |
consonant_final(c) --> [c]. | |
consonant_final(ch) --> [c, h]. | |
consonant_final(p) --> [p]. | |
consonant_final(t) --> [t]. | |
consonant_final(n) --> [n]. | |
consonant_final(m) --> [m]. | |
% vowel nucleus transformation table schema: | |
% resulting syllable, base syllable, vowel mod, tone, allow incomplete | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_ngang) :- terminal_vowel(Raw, Mod, (Result, _, _, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_huyen) :- terminal_vowel(Raw, Mod, (_, Result, _, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_sac) :- terminal_vowel(Raw, Mod, (_, _, Result, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_hoi) :- terminal_vowel(Raw, Mod, (_, _, _, Result, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_nga) :- terminal_vowel(Raw, Mod, (_, _, _, _, Result, _)). | |
vowel_nucleus_mod_tone(s(I, Result, ''), s(I, Raw, ''), Mod, tone_nang) :- terminal_vowel(Raw, Mod, (_, _, _, _, _, Result)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_ngang) :- not_strictly_terminal_vowel(Raw, Mod, (Result, _, _, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_huyen) :- not_strictly_terminal_vowel(Raw, Mod, (_, Result, _, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_sac) :- not_strictly_terminal_vowel(Raw, Mod, (_, _, Result, _, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_hoi) :- not_strictly_terminal_vowel(Raw, Mod, (_, _, _, Result, _, _)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_nga) :- not_strictly_terminal_vowel(Raw, Mod, (_, _, _, _, Result, _)). | |
vowel_nucleus_mod_tone(s(I, Result, F), s(I, Raw, F), Mod, tone_nang) :- not_strictly_terminal_vowel(Raw, Mod, (_, _, _, _, _, Result)). | |
vowel_nucleus_mod_tone(s(I, V, F), s(I, V, F), mod_none, tone_ngang). | |
not_strictly_terminal_vowel(a, mod_breve_a, (ă, ằ, ắ, ẳ, ẵ, ặ)). | |
not_strictly_terminal_vowel(a, mod_hat_a, (â, ầ, ấ, ẩ, ẫ, ậ)). | |
not_strictly_terminal_vowel(a, mod_none, (a, à, á, ả, ã, ạ)). | |
not_strictly_terminal_vowel(e, mod_hat_e, (ê, ề, ế, ể, ễ, ệ)). | |
not_strictly_terminal_vowel(e, mod_none, (e, è, é, ẻ, ẽ, ẹ)). | |
not_strictly_terminal_vowel(i, mod_none, (i, ì, í, ỉ, ĩ, ị)). | |
not_strictly_terminal_vowel(ie, mod_hat_e, (iê, iề, iế, iể, iễ, iệ)). | |
not_strictly_terminal_vowel(o, mod_hat_o, (ô, ồ, ố, ổ, ỗ, ộ)). | |
not_strictly_terminal_vowel(o, mod_horn_o, (ơ, ờ, ớ, ở, ỡ, ợ)). | |
not_strictly_terminal_vowel(o, mod_none, (o, ò, ó, ỏ, õ, ọ)). | |
not_strictly_terminal_vowel(oa, mod_breve_a, (oă, oằ, oắ, oẳ, oẵ, oặ)). | |
not_strictly_terminal_vowel(oa, mod_none, (oa, oà, oá, oả, oã, oạ)). | |
not_strictly_terminal_vowel(u, mod_horn_u, (ư, ừ, ứ, ử, ữ, ự)). | |
not_strictly_terminal_vowel(u, mod_none, (u, ù, ú, ủ, ũ, ụ)). | |
not_strictly_terminal_vowel(ue, mod_hat_e, (uê, uề, uế, uể, uễ, uệ)). | |
not_strictly_terminal_vowel(ua, mod_hat_a, (uâ, uầ, uấ, uẩ, uẫ, uậ)). | |
not_strictly_terminal_vowel(uo, mod_hat_o, (uô, uồ, uố, uổ, uỗ, uộ)). | |
not_strictly_terminal_vowel(uo, mod_horn_uo, (ươ, ườ, ướ, ưở, ưỡ, ượ)). | |
not_strictly_terminal_vowel(uo, mod_horn_uo, (uơ, uờ, uớ, uở, uỡ, uợ)). | |
not_strictly_terminal_vowel(uye, mod_hat_e, (uyê, uyề, uyế, uyể, uyễ, uyệ)). | |
% The incomplete forms happen as transitional states before the final word. For example, if you type huyenf then you get | |
% huyèn, an arguable invalid VNmese word, but if you type 'e' after that, you get huyền, a valid word. | |
% We don't do anything with the incomplete forms yet but it's a good distintion to make, at least at the mental level. | |
not_strictly_terminal_vowel(A, B, C) :- not_strictly_terminal_vowel_incomplete(A, B, C). | |
not_strictly_terminal_vowel_incomplete(uo, mod_none, (uo, uò, uó, uỏ, uõ, uọ)). | |
not_strictly_terminal_vowel_incomplete(uo, mod_horn_uo, (ưo, ừo, ứo, ửo, ữo, ựo)). | |
not_strictly_terminal_vowel_incomplete(uo, mod_horn_uo, (ưo, ưò, ưó, ưỏ, ưõ, ưọ)). | |
not_strictly_terminal_vowel_incomplete(uye, mod_none, (uye, uyè, uyé, uyẻ, uyẽ, uyẹ)). | |
not_strictly_terminal_vowel_incomplete(ie, mod_none, (ie, iè, ié, iẻ, iẽ, iẹ)). | |
not_strictly_terminal_vowel_incomplete(ie, mod_none, (ie, ìe, íe, ỉe, ĩe, ịe)). | |
not_strictly_terminal_vowel_incomplete(ue, mod_none, (ue, uè, ué, uẻ, uẽ, uẹ)). | |
terminal_vowel(ai, mod_none, (ai, ài, ái, ải, ãi, ại)). | |
terminal_vowel(ao, mod_none, (ao, ào, áo, ảo, ão, ạo)). | |
terminal_vowel(au, mod_hat_a, (âu, ầu, ấu, ẩu, ẫu, ậu)). | |
terminal_vowel(au, mod_none, (au, àu, áu, ảu, ãu, ạu)). | |
terminal_vowel(ay, mod_hat_a, (ây, ầy, ấy, ẩy, ẫy, ậy)). | |
terminal_vowel(ay, mod_none, (ay, ày, áy, ảy, ãy, ạy)). | |
terminal_vowel(eo, mod_none, (eo, èo, éo, ẻo, ẽo, ẹo)). | |
terminal_vowel(eu, mod_hat_e, (êu, ều, ếu, ểu, ễu, ệu)). | |
terminal_vowel(ia, mod_none, (ia, ìa, ía, ỉa, ĩa, ịa)). | |
terminal_vowel(ie, mod_none, (ie, ìe, íe, ỉe, ĩe, ịe)). | |
terminal_vowel(ieu, mod_hat_e, (iêu, iều, iếu, iểu, iễu, iệu)). | |
terminal_vowel(iu, mod_none, (iu, ìu, íu, ỉu, ĩu, ịu)). | |
terminal_vowel(oa, mod_none, (oa, oà, oá, oả, oã, oạ)) :- get_flag(bogo:dau_moi, true). | |
terminal_vowel(oa, mod_none, (oa, òa, óa, ỏa, õa, ọa)). | |
terminal_vowel(oe, mod_none, (oe, oè, oé, oẻ, oẽ, oẹ)) :- get_flag(bogo:dau_moi, true). | |
terminal_vowel(oe, mod_none, (oe, òe, óe, ỏe, õe, ọe)). | |
terminal_vowel(oi, mod_none, (oi, òi, ói, ỏi, õi, ọi)). | |
terminal_vowel(ua, mod_none, (ua, ùa, úa, ủa, ũa, ụa)). | |
terminal_vowel(ui, mod_horn_u, (ưi, ừi, ứi, ửi, ữi, ựi)). | |
terminal_vowel(ui, mod_none, (ui, ùi, úi, ủi, ũi, ụi)). | |
terminal_vowel(uo, mod_horn_uo, (uơ, uờ, uớ, uở, uỡ, uợ)). | |
terminal_vowel(uoi, mod_hat_o, (uôi, uồi, uối, uổi, uỗi, uội)). | |
terminal_vowel(uoi, mod_horn_uo, (ươi, ười, ưới, ưởi, ưỡi, ượi)). | |
terminal_vowel(uou, mod_horn_uo, (ươu, ườu, ướu, ưởu, ưỡu, ượu)). | |
terminal_vowel(uu, mod_horn_u, (ưu, ừu, ứu, ửu, ữu, ựu)). | |
terminal_vowel(uy, mod_none, (uy, uỳ, uý, uỷ, uỹ, uỵ)) :- get_flag(bogo:dau_moi, true). | |
terminal_vowel(uy, mod_none, (uy, ùy, úy, ủy, ũy, ụy)). | |
terminal_vowel(y, mod_none, (y, ỳ, ý, ỷ, ỹ, ỵ)). | |
terminal_vowel(ya, mod_none, (ya, ỳa, ýa, ỷa, ỹa, ỵa)). | |
terminal_vowel(A, B, C) :- terminal_vowel_incomplete(A, B, C). | |
terminal_vowel_incomplete(uoi, mod_none, (uoi, uòi, uói, uỏi, uõi, uọi)). | |
terminal_vowel_incomplete(uoi, mod_none, (uoi, uòi, uói, uỏi, uõi, uọi)). | |
terminal_vowel_incomplete(uou, mod_none, (uou, uòu, uóu, uỏu, uõu, uọu)). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <SWI-Prolog.h> | |
#define MAXLINE 1024 | |
int | |
main(int argc, char **argv) | |
{ char expression[MAXLINE]; | |
char *e = expression; | |
char *program = argv[0]; | |
char *plav[2]; | |
int n; | |
/* combine all the arguments in a single string */ | |
for(n=1; n<argc; n++) | |
{ if ( n != 1 ) | |
*e++ = ' '; | |
strcpy(e, argv[n]); | |
e += strlen(e); | |
} | |
/* make the argument vector for Prolog */ | |
plav[0] = program; | |
plav[1] = NULL; | |
/* initialise Prolog */ | |
if ( !PL_initialise(1, plav) ) | |
PL_halt(1); | |
/* Lookup calc/1 and make the arguments and call */ | |
{ predicate_t pred = PL_predicate("process_nice", 2, "user"); | |
term_t h0 = PL_new_term_refs(2); | |
term_t h1 = h0 + 1; | |
int rval; | |
PL_put_atom_chars(h0, "ddoongj"); | |
rval = PL_call_predicate(NULL, PL_Q_NORMAL, pred, h0); | |
char* output; | |
size_t len; | |
int success = PL_get_chars(h1, &output, CVT_LIST | REP_UTF8); | |
if (success) { | |
printf("%s\n", output); | |
} else { | |
printf("failed\n"); | |
} | |
// for (int i = 0; i < len; i++) { | |
// printf("%02X %d", (unsigned char) output[i], PL_is_atom(h1)); | |
// } | |
// printf('\n'); | |
PL_halt(rval ? 0 : 1); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test: bogo.pl test.c | |
swipl-ld -goal true -o test test.c bogo.pl | |
clean: | |
rm -rf test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
error: git-cola died of signal 15 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <SWI-Prolog.h> | |
#define MAXLINE 1024 | |
int | |
main(int argc, char **argv) | |
{ char expression[MAXLINE]; | |
char *e = expression; | |
char *program = argv[0]; | |
char *plav[2]; | |
int n; | |
/* combine all the arguments in a single string */ | |
for(n=1; n<argc; n++) | |
{ if ( n != 1 ) | |
*e++ = ' '; | |
strcpy(e, argv[n]); | |
e += strlen(e); | |
} | |
/* make the argument vector for Prolog */ | |
plav[0] = program; | |
plav[1] = NULL; | |
/* initialise Prolog */ | |
if ( !PL_initialise(1, plav) ) | |
PL_halt(1); | |
/* Lookup calc/1 and make the arguments and call */ | |
{ predicate_t pred = PL_predicate("process_nice", 2, "user"); | |
term_t h0 = PL_new_term_refs(2); | |
term_t h1 = h0 + 1; | |
int rval; | |
PL_put_atom_chars(h0, "ddoongj"); | |
rval = PL_call_predicate(NULL, PL_Q_NORMAL, pred, h0); | |
char* output; | |
size_t len; | |
int success = PL_get_chars(h1, &output, CVT_LIST | REP_UTF8); | |
if (success) { | |
printf("%s\n", output); | |
} else { | |
printf("failed\n"); | |
} | |
// for (int i = 0; i < len; i++) { | |
// printf("%02X %d", (unsigned char) output[i], PL_is_atom(h1)); | |
// } | |
// printf('\n'); | |
PL_halt(rval ? 0 : 1); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment