Skip to content

Instantly share code, notes, and snippets.

@samcv
Created January 23, 2017 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samcv/1989fe2c72f7aafa4c4df7b79c968926 to your computer and use it in GitHub Desktop.
Save samcv/1989fe2c72f7aafa4c4df7b79c968926 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#define uninames_elems
char * get_uninames ( char * out, uint32_t cp ) {
// uninames
if (cp >= 0 && cp <= 31)
return sprintf(out, "<control-%.4X>", cp);
return 0;
}
char ctable[40] = {
'\0','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9',' ','-','\a'
};
char * s_table[40] = {
"CAPITAL","LETTER","LATIN","DIGIT","PARENTHESIS","SIGN","SMALL","BRACKET","SOLIDUS","EXCLAMATION","COMMERCIAL","SQUARE","ACCENT","CIRCUMFLEX","APOSTROPHE","QUOTATION","AMPERSAND","SEMICOLON","MARK","QUESTION","ASTERISK","RIGHT","PERCENT","GREATER","REVERSE","HYPHEN","EQUALS","NUMBER","DOLLAR","LEFT","THAN","COLON","COMMA","SEVEN","EIGHT","SPACE","MINUS","GRAVE","THREE","LESS"
};
const static uint16_t uninames[201] = {
0,0,0,0,0,0,0,0,0,0,39,56039,15919,28839,25519,28839,44719,8039,46319,8039,
36719,8039,25639,22439,47919,6439,35119,6439,32016,20059,60765,1592,1585,62396,
261,19717,31215,25639,12839,6306,8735,1563,59814,8039,6300,37400,62557,63920,
62557,10221,28839,6286,15285,1563,59969,38439,6319,52839,6319,54439,6294,14965,
1591,1577,1599,62390,60765,1586,60765,1583,62390,60765,1579,60778,1570,59260,
1562,60760,60761,59240,62517,62437,62477,3239,4719,1519,3083,1562,60760,60761,
59360,62517,62437,62477,8039,4719,1519,3086,1562,60760,60761,59480,62517,62437,
62477,12839,4719,1519,3089,1562,60760,60761,59600,62517,62437,62477,17639,4719,
1519,3092,1562,60760,60761,59720,62517,62437,62477,22439,4719,1519,3095,1562,
60760,60761,59840,62517,62437,62477,27239,4719,1519,3098,1562,60760,60761,59960,
62517,62437,62477,32039,4719,1519,3101,1562,60760,60761,60080,62517,62437,62477,
36839,4719,1519,3104,1562,60760,60761,60200,62517,62437,62477,41639,47919,19119,
11239,39919,12839,35119,19119,11239,22319,19212,24957,19574,8039,60719,19239,
4719,11119,3081,1562,60766,60761,59280,62517,62677,62477,4839,4719,11119,
3084};
#define LONGEST_NAME 22
typedef struct Decompressor {
/* Encoding an entry gives us three "commands" that can be a character or
* something in a further shift level. Hold them in here for future
* consumption. */
int16_t queue[6];
/* How many valid entries are currently in the queue? */
uint16_t queue_len;
const unsigned short * input_position;
/* Were we signalled to end reading this string and continue with the next one? */
uint8_t eos_signalled;
uint8_t out_buf_pos;
/* We put our characters here. */
char out_buf[LONGEST_NAME + 1];
} Decompressor;
void digest_one_chunk(Decompressor *ds) {
uint16_t num = *(ds->input_position++);
uint32_t temp;
temp = num / 1600;
ds->queue[ds->queue_len++] = temp;
ds->queue[ds->queue_len++] = (num - temp * 1600) / 40;
ds->queue[ds->queue_len++] = num % 40;
/*fprintf(stderr, "digest one chunk, %d -> %d %d %d\n", num, ds->queue[ds->queue_len - 3], ds->queue[ds->queue_len - 2], ds->queue[ds->queue_len - 1]);*/
}
void eat_a_string( Decompressor *ds ) {
ds->eos_signalled = 0;
while (!ds->eos_signalled) {
/*fprintf(stderr, "start of loop: %d codemes in queue\n", ds->queue_len);*/
if (ds->queue_len == 0) { digest_one_chunk(ds); }
if (ds->queue[0] == 39) {
if (ds->queue_len == 1) { digest_one_chunk(ds); }
/* Assume it's shifted by one */
/* XXX too tired to check if the n parameter actually prevents buffer overflows. */
strncpy(ds->out_buf + ds->out_buf_pos, s_table[ds->queue[1]], LONGEST_NAME - ds->out_buf_pos);
ds->out_buf_pos += strlen(s_table[ds->queue[1]]);
/*fprintf(stderr, "concated string number %d: %s\n", ds->queue[1], s_table[ds->queue[1]]);*/
/* Let the two codemes flow out of the queue. */
memmove(ds->queue, ds->queue + 2, (6 - 2) * 2);
ds->queue_len -= 2;
}
else {
ds->out_buf[ds->out_buf_pos++] = ctable[ds->queue[0]];
if (ds->queue[0] == 0) {
ds->eos_signalled = 1;
ds->out_buf_pos = 0;
}
memmove(ds->queue, ds->queue + 1, (6 - 1) * 2);
ds->queue_len--;
}
/*fprintf(stderr, "out_buf_pos now %d\n", ds->out_buf_pos);*/
}
}
int main (void) {
int32_t cp = 0;
Decompressor ds = {};
ds.input_position = (const unsigned short *) &uninames;
while (ds.input_position < uninames + uninames_elems + 50) {
eat_a_string(&ds);
if (ds.out_buf[0] == '\0') {
get_uninames(ds.out_buf, cp);
}
printf("U+%X '%s'\n", cp, ds.out_buf);
cp++;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment