Created
January 23, 2017 07:36
-
-
Save samcv/1989fe2c72f7aafa4c4df7b79c968926 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#define uninames_elems | |
char * get_uninames ( char * out, uint32_t cp ) { | |
// uninames | |
if (cp >= 0 && cp <= 31) | |
return sprintf(out, "<control-%.4X>", cp); | |
return 0; | |
} | |
char ctable[40] = { | |
'\0','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9',' ','-','\a' | |
}; | |
char * s_table[40] = { | |
"CAPITAL","LETTER","LATIN","DIGIT","PARENTHESIS","SIGN","SMALL","BRACKET","SOLIDUS","EXCLAMATION","COMMERCIAL","SQUARE","ACCENT","CIRCUMFLEX","APOSTROPHE","QUOTATION","AMPERSAND","SEMICOLON","MARK","QUESTION","ASTERISK","RIGHT","PERCENT","GREATER","REVERSE","HYPHEN","EQUALS","NUMBER","DOLLAR","LEFT","THAN","COLON","COMMA","SEVEN","EIGHT","SPACE","MINUS","GRAVE","THREE","LESS" | |
}; | |
const static uint16_t uninames[201] = { | |
0,0,0,0,0,0,0,0,0,0,39,56039,15919,28839,25519,28839,44719,8039,46319,8039, | |
36719,8039,25639,22439,47919,6439,35119,6439,32016,20059,60765,1592,1585,62396, | |
261,19717,31215,25639,12839,6306,8735,1563,59814,8039,6300,37400,62557,63920, | |
62557,10221,28839,6286,15285,1563,59969,38439,6319,52839,6319,54439,6294,14965, | |
1591,1577,1599,62390,60765,1586,60765,1583,62390,60765,1579,60778,1570,59260, | |
1562,60760,60761,59240,62517,62437,62477,3239,4719,1519,3083,1562,60760,60761, | |
59360,62517,62437,62477,8039,4719,1519,3086,1562,60760,60761,59480,62517,62437, | |
62477,12839,4719,1519,3089,1562,60760,60761,59600,62517,62437,62477,17639,4719, | |
1519,3092,1562,60760,60761,59720,62517,62437,62477,22439,4719,1519,3095,1562, | |
60760,60761,59840,62517,62437,62477,27239,4719,1519,3098,1562,60760,60761,59960, | |
62517,62437,62477,32039,4719,1519,3101,1562,60760,60761,60080,62517,62437,62477, | |
36839,4719,1519,3104,1562,60760,60761,60200,62517,62437,62477,41639,47919,19119, | |
11239,39919,12839,35119,19119,11239,22319,19212,24957,19574,8039,60719,19239, | |
4719,11119,3081,1562,60766,60761,59280,62517,62677,62477,4839,4719,11119, | |
3084}; | |
#define LONGEST_NAME 22 | |
typedef struct Decompressor { | |
/* Encoding an entry gives us three "commands" that can be a character or | |
* something in a further shift level. Hold them in here for future | |
* consumption. */ | |
int16_t queue[6]; | |
/* How many valid entries are currently in the queue? */ | |
uint16_t queue_len; | |
const unsigned short * input_position; | |
/* Were we signalled to end reading this string and continue with the next one? */ | |
uint8_t eos_signalled; | |
uint8_t out_buf_pos; | |
/* We put our characters here. */ | |
char out_buf[LONGEST_NAME + 1]; | |
} Decompressor; | |
void digest_one_chunk(Decompressor *ds) { | |
uint16_t num = *(ds->input_position++); | |
uint32_t temp; | |
temp = num / 1600; | |
ds->queue[ds->queue_len++] = temp; | |
ds->queue[ds->queue_len++] = (num - temp * 1600) / 40; | |
ds->queue[ds->queue_len++] = num % 40; | |
/*fprintf(stderr, "digest one chunk, %d -> %d %d %d\n", num, ds->queue[ds->queue_len - 3], ds->queue[ds->queue_len - 2], ds->queue[ds->queue_len - 1]);*/ | |
} | |
void eat_a_string( Decompressor *ds ) { | |
ds->eos_signalled = 0; | |
while (!ds->eos_signalled) { | |
/*fprintf(stderr, "start of loop: %d codemes in queue\n", ds->queue_len);*/ | |
if (ds->queue_len == 0) { digest_one_chunk(ds); } | |
if (ds->queue[0] == 39) { | |
if (ds->queue_len == 1) { digest_one_chunk(ds); } | |
/* Assume it's shifted by one */ | |
/* XXX too tired to check if the n parameter actually prevents buffer overflows. */ | |
strncpy(ds->out_buf + ds->out_buf_pos, s_table[ds->queue[1]], LONGEST_NAME - ds->out_buf_pos); | |
ds->out_buf_pos += strlen(s_table[ds->queue[1]]); | |
/*fprintf(stderr, "concated string number %d: %s\n", ds->queue[1], s_table[ds->queue[1]]);*/ | |
/* Let the two codemes flow out of the queue. */ | |
memmove(ds->queue, ds->queue + 2, (6 - 2) * 2); | |
ds->queue_len -= 2; | |
} | |
else { | |
ds->out_buf[ds->out_buf_pos++] = ctable[ds->queue[0]]; | |
if (ds->queue[0] == 0) { | |
ds->eos_signalled = 1; | |
ds->out_buf_pos = 0; | |
} | |
memmove(ds->queue, ds->queue + 1, (6 - 1) * 2); | |
ds->queue_len--; | |
} | |
/*fprintf(stderr, "out_buf_pos now %d\n", ds->out_buf_pos);*/ | |
} | |
} | |
int main (void) { | |
int32_t cp = 0; | |
Decompressor ds = {}; | |
ds.input_position = (const unsigned short *) &uninames; | |
while (ds.input_position < uninames + uninames_elems + 50) { | |
eat_a_string(&ds); | |
if (ds.out_buf[0] == '\0') { | |
get_uninames(ds.out_buf, cp); | |
} | |
printf("U+%X '%s'\n", cp, ds.out_buf); | |
cp++; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment