Created
September 11, 2017 00:00
-
-
Save MightyPork/aa6faaaca94349b3b87fca7b730d3c2c to your computer and use it in GitHub Desktop.
unicode cache for espterm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <string.h> | |
// --- espterm stubs --- | |
typedef uint8_t u8; | |
typedef uint16_t u16; | |
typedef uint32_t u32; | |
#define ICACHE_FLASH_ATTR | |
#define dbg(fmt, ...) printf("[ ] "fmt"\n", ##__VA_ARGS__) | |
#define warn(fmt, ...) printf("[w] "fmt"\n", ##__VA_ARGS__) | |
#define error(fmt, ...) printf("[!] "fmt"\n", ##__VA_ARGS__) | |
#define strneq(a, b, n) (strncmp((const char*)(a), (const char*)(b), n) == 0) | |
// --------------------- | |
typedef u8 UnicodeCacheRef; | |
#define UNICODE_CACHE_SIZE 160 | |
typedef struct { | |
u8 bytes[4]; | |
uint16_t count; | |
} UnicodeCacheSlot; | |
static UnicodeCacheSlot cache[UNICODE_CACHE_SIZE]; | |
#define REF_TO_ID(c) (u8)(c > 127 ? c & 0x7f + 32 : c) | |
#define ID_TO_REF(c) (UnicodeCacheRef)(c > 31 ? c + 95 : c) | |
/** | |
* Add a code point to the cache. ASCII is passed through. | |
* If the code point is already stored, it's use counter is incremented. | |
* | |
* @param bytes - utf8 bytes | |
* @return the obtained look-up reference | |
*/ | |
UnicodeCacheRef ICACHE_FLASH_ATTR | |
unicode_cache_add(const u8 *bytes) { | |
if (bytes[0] < 127) return bytes[0]; // ASCII, bypass | |
if (bytes[0] < 32) { | |
warn("utf8 cache illegal store '%c'", bytes[0]); | |
return '?'; | |
} | |
u8 slot; | |
for (slot = 0; slot < UNICODE_CACHE_SIZE; slot++) { | |
if (strneq(cache[slot].bytes, bytes, 4)) { | |
cache[slot].count++; | |
if (cache[slot].count == 1) { | |
dbg("utf8 cache resurrect '%.4s' @ %d", bytes, slot); | |
} else { | |
dbg("utf8 cache inc '%.4s' @ %d, %d uses", bytes, slot, cache[slot].count); | |
} | |
goto suc; | |
} | |
} | |
for (slot = 0; slot < UNICODE_CACHE_SIZE; slot++) { | |
if (cache[slot].count==0) { | |
// empty slot, store it | |
strncpy(cache[slot].bytes, bytes, 4); // this will zero out the remainder | |
cache[slot].count = 1; | |
dbg("utf8 cache new '%.4s' @ %d", bytes, slot); | |
goto suc; | |
} | |
} | |
error("utf8 cache full"); | |
return '?'; // fallback to normal ASCII that will show to the user | |
suc: | |
return ID_TO_REF(slot); | |
} | |
/** | |
* Look up a code point in the cache by reference. Do not change the use counter. | |
* | |
* @param ref - reference obtained earlier using unicode_cache_add() | |
* @param target - buffer of size 4 to hold the result. | |
* @return true if the look-up succeeded | |
*/ | |
bool ICACHE_FLASH_ATTR | |
unicode_cache_retrieve(UnicodeCacheRef ref, u8 *target) { | |
if (ref > 31 && ref < 127) { | |
// ASCII, bypass | |
target[0] = ref; | |
target[1] = 0; | |
return true; | |
} | |
u8 slot = REF_TO_ID(ref); | |
if (cache[slot].count == 0) { | |
// "use after free" | |
target[0] = '?'; | |
target[1] = 0; | |
error("utf8 cache use-after-free @ %d (freed)", slot); | |
return false; | |
} | |
dbg("utf8 cache hit '%.4s' @ %d, uses %d", cache[slot].bytes, slot, cache[slot].count); | |
strncpy(target, cache[slot].bytes, 4); | |
return true; | |
} | |
/** | |
* Remove an occurence of a code point from the cache. | |
* If the code point is used more than once, the use counter is decremented. | |
* | |
* @param ref - reference to remove or reduce | |
* @return true if the code point was found in the cache | |
*/ | |
bool ICACHE_FLASH_ATTR | |
unicode_cache_remove(UnicodeCacheRef ref) { | |
if (ref > 31 && ref < 127) return true; // ASCII, bypass | |
u8 slot = REF_TO_ID(ref); | |
if (cache[slot].count == 0) { | |
error("utf8 cache double-free @ %d", slot, cache[slot].count); | |
return false; | |
} | |
cache[slot].count--; | |
if (cache[slot].count) { | |
dbg("utf8 cache sub '%.4s' @ %d, %d uses remain", cache[slot].bytes, slot, cache[slot].count); | |
} else { | |
dbg("utf8 cache del '%.4s' @ %d", cache[slot].bytes, slot, cache[slot].count); | |
} | |
return true; | |
} | |
void main (void) | |
{ | |
u8 buf[4]; | |
u8 cc = unicode_cache_add("č\0\0"); | |
unicode_cache_add("č\0\0"); | |
u8 rc = unicode_cache_add("ř\0\0"); | |
unicode_cache_add("ř\0\0"); | |
unicode_cache_add("ř\0\0"); | |
unicode_cache_add("ř\0\0"); | |
u8 heart = unicode_cache_add("💙\0"); | |
unicode_cache_retrieve(cc, buf); | |
printf("%.4s\n", buf); | |
unicode_cache_retrieve(rc, buf); | |
printf("%.4s\n", buf); | |
unicode_cache_retrieve(heart, buf); | |
printf("%.4s\n", buf); | |
unicode_cache_remove(heart); | |
unicode_cache_remove(rc); | |
unicode_cache_remove(rc); | |
unicode_cache_remove(rc); | |
unicode_cache_remove(rc); | |
unicode_cache_add("💙\0"); | |
unicode_cache_add("A\0\0\0"); | |
unicode_cache_add("ñ\0\0"); | |
unicode_cache_remove(cc); | |
unicode_cache_remove(cc); | |
unicode_cache_add("ñ\0\0"); | |
unicode_cache_remove(heart); | |
unicode_cache_add("ñ\0\0"); | |
unicode_cache_add("±\0\0"); | |
unicode_cache_add("¯\0\0"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment