Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
UILにUTF-8を無理矢理通すゴリ押し
--- orig/clients/uil/UilLexAna.c 2012-10-22 23:50:39.000000000 +0900
+++ motif-2.3.4/clients/uil/UilLexAna.c 2015-12-08 09:20:32.656978867 +0900
@@ -982,6 +982,23 @@
RIGHT_SHIFT };
+
+#ifdef OREORE_QUICKIE_FIX
+#warning OREORE_QUICKIE_FIX defined!!!
+#endif
+
+#ifdef OREORE_QUICKIE_FIX
+ /* 即席UTF-8先頭ビットチェック */
+ #define OREORE_EQUAL(X, Y) ((X & Y) == Y)
+ #define OREORE_GET_UTF8_BYTES(c) \
+ (OREORE_EQUAL((c), 0xc0) + \
+ OREORE_EQUAL((c), 0xe0) + \
+ OREORE_EQUAL((c), 0xf0) + \
+ OREORE_EQUAL((c), 0xf8) + \
+ OREORE_EQUAL((c), 0xfc))
+#endif
+
+
/*
**++
** FUNCTIONAL DESCRIPTION:
@@ -1034,6 +1051,11 @@
*az_start_src_record;/* starting source record of a token */
lex_buffer_type
*az_current_lex_buffer; /* current lexical buffer */
+#ifdef OREORE_QUICKIE_FIX
+ int l_oreore_str_remain;
+ unsigned char c_oreore_prev_char;
+ unsigned char c_oreore_cur_char;
+#endif
az_charset_entry = (sym_value_entry_type *) 0;
@@ -1062,24 +1084,56 @@
l_charset = lex_k_default_charset;
l_16bit_chars_only = FALSE;
l_state = state_initial;
-
+#ifdef OREORE_QUICKIE_FIX
+ l_oreore_str_remain = 0;
+ c_oreore_prev_char = 0;
+ c_oreore_cur_char = 0;
+#endif
/* start looking for the token */
continue_in_next_state:
for (;;)
{
+#ifdef OREORE_QUICKIE_FIX
+ c_oreore_prev_char = c_oreore_cur_char;
+#endif
/* get next input char */
/* advance source too */
c_char = src_az_current_source_buffer->c_text
[ src_az_current_source_buffer->w_current_position++ ];
-
+
+#ifdef OREORE_QUICKIE_FIX
+ c_oreore_cur_char = c_char;
+#endif
/* %COMPLETE */
Uil_characters_read++;
l_class = class_table[ c_char ]; /* determine its class */
z_cell = token_table[ l_state][l_class ]; /* load state cell */
+#ifdef OREORE_QUICKIE_FIX
+ /* UTF-8の続きの場合はステートチェックのみ */
+ if (l_oreore_str_remain > 0) {
+ if (class_illegal == l_class) {
+ l_class = class_name;
+ z_cell = token_table[ l_state][l_class ];
+ }
+ l_oreore_str_remain--;
+ }
+ /* class_illegal かつ control_charの場合にUTF-8判定 */
+ if ((class_illegal == l_class) &&
+ (control_char == z_cell.backup))
+ {
+ if (c_oreore_prev_char > 0x0A || c_oreore_prev_char > 0x0a) {
+ /* 先頭で長さ取得 (以前の処理ですっ飛ばされている場合はゼロになる) */
+ l_oreore_str_remain = OREORE_GET_UTF8_BYTES(c_oreore_prev_char >=0x80 ? c_oreore_prev_char : c_char) +1;
+ /* ステートを正常値に無理矢理書き換える */
+ l_class = class_name;
+ z_cell = token_table[ l_state][l_class ];
+ }
+ }
+#endif
/* pick up the next state, or terminal, or error */
l_state = z_cell.next_state;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment