This is part of my brainstorming for how to go from codepoint numbers to rows in the bitfield that store the property data for each codepoint. Due to there being hundreds of thounsands of codepoints, we cannot just store an array that stores hundreds of thousands of items.
Meaning of this table explained in the pseudocode below.
low | high | bitfield_row | miss |
---|---|---|---|
14 | 27 | 0 | 14 |
71 | 90 | 39 | 24 |
107 | 122 | 35 | 40 |
if cp >= low && cp <= high {
/* If it's between high and low, we have the result in the bitfield_row already */
return bitfield_row
}
else {
/* If it is not between low and high, but it is smaller than `low` in the next column
we can subtract `miss` and get the bitfield row for that codepoint */
return lookup[cp - miss]
}
Here is an example that uses linear search. If this data structure is used, it does not preclude the use of binary rather than linear search, or fastpaths for the most common characters.
/* used for misses, contains bitfield row numbers */
uint16_t lookup[] = {
0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,6,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,36,9,14,38,38,38,38,..}
struct table {
uint32_t low;
uint32_t high;
uint32_t bitfield_row;
uint32_t miss;
};
typedef struct table table;
table sorted_table[] = {
{14, 27, 0, 14},
{71, 90, 39, 24},
{107, 22, 35, 40}
};
#define sorted_table_elems ...
int get_offset_new (uint32_t cp) {
int i = 0;
for (;1;i++) {
if (cp < sorted_table[i+1].low) {
if (cp >= sorted_table[i].low && cp <= sorted_table[i].high) {
return sorted_table[i].bitfield_row;
}
/* Case to catch cp's less than the first point in the table
* eventually there should be an extra row at the beginning of the table for this? */
else if (cp < sorted_table[i].low) {
return point_index[cp];
}
else {
return point_index[cp - sorted_table[i].miss];
}
}
}
}