Last active
September 8, 2019 20:33
-
-
Save pinkeen/efcc81232e4c2d7c53837e97b68cc677 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
/** | |
* Simple string hash with minimal memory requirements. | |
* | |
* This is meant for uCs with 128-512 byte SRAM - | |
* the purpose is to avoid having to store string | |
* identifier in RAM when parsing some incoming data. | |
* | |
* A hash can be computed as the string is consumed and | |
* then only the 16bit value needs to be compared instead | |
* of the buffering whole string. | |
* | |
* Of course this should consume char stream instead of | |
* iterating over buffered string. | |
* | |
* Collisions should not be an issue with small indentifier | |
* space to cover - maybe the hash size can be even decreased | |
* to 8bit? | |
* | |
* This is not crypto-safe! In fact the exact opposite π | |
**/ | |
/* Use a simple xorshift hash function | |
* instead of storing the full mixing table. */ | |
uint8_t T(uint8_t i) { | |
i ^= i >> 2; | |
i ^= i << 1; | |
return i; | |
} | |
/* Simple pearson hash algo. */ | |
uint16_t H(char *c) { | |
uint8_t h[2]; | |
while (*c != '\0') { | |
uint8_t *hb = h + ((size_t)c & 1); | |
*hb = T(*hb ^ (uint8_t)*c); | |
++c; | |
} | |
return h[0] + (h[1] << 8); | |
} | |
void print_str_hash(char *str) { | |
uint16_t HV = H(str); | |
printf("%05d [0x%04x] = H(\"%s\")\n", HV, HV, str); | |
} | |
int main() { | |
/* On a single core of i7-8700 the speed is roughly | |
* 27M iterations / second for the word "identifier" | |
* which translates to 270M chars/sec. */ | |
for (unsigned int i = 0; i < 0xFFFFFFF; ++i) { | |
H("identifier"); | |
} | |
print_str_hash("identifier"); | |
print_str_hash("identifierB"); | |
print_str_hash("identif1er"); | |
print_str_hash("/identif1er"); | |
print_str_hash("a.very.long.path.to[1].something"); | |
print_str_hash("value"); | |
print_str_hash("a_value_a"); | |
print_str_hash("a_value_b"); | |
print_str_hash("a_value_c"); | |
print_str_hash("a_value_d"); | |
print_str_hash("a_value_e"); | |
print_str_hash("a_value_f"); | |
print_str_hash("a_v4lue_f"); | |
print_str_hash("a_valu3_f"); | |
print_str_hash("a_valu3_f4324234"); | |
print_str_hash("name"); | |
print_str_hash("what to you mean ___ ??? !!ΕΔ_.fewqrwe%$#@$#@$!(_@)#_)!8FJLDK:FJLDS:34234234!"); | |
print_str_hash("aaaa"); | |
print_str_hash("zzbb"); | |
print_str_hash("00cc"); | |
print_str_hash("__zz"); | |
print_str_hash("abab"); | |
print_str_hash("abba"); | |
/* No reason why UTF8 would not work π§ */ | |
print_str_hash("ππ πππ "); | |
} | |
/** Output ** | |
59378 [0xe7f2] = H("identifier") | |
62100 [0xf294] = H("identifierB") | |
55527 [0xd8e7] = H("identif1er") | |
33255 [0x81e7] = H("/identif1er") | |
18000 [0x4650] = H("a.very.long.path.to[1].something") | |
17384 [0x43e8] = H("value") | |
56218 [0xdb9a] = H("a_value_a") | |
56223 [0xdb9f] = H("a_value_b") | |
56220 [0xdb9c] = H("a_value_c") | |
56214 [0xdb96] = H("a_value_d") | |
56213 [0xdb95] = H("a_value_e") | |
56208 [0xdb90] = H("a_value_f") | |
59280 [0xe790] = H("a_v4lue_f") | |
56204 [0xdb8c] = H("a_valu3_f") | |
13117 [0x333d] = H("a_valu3_f4324234") | |
21119 [0x527f] = H("name") | |
54759 [0xd5e7] = H("what to you mean ___ ??? !!ΕΔ_.fewqrwe%$#@$#@$!(_@)#_)!8FJLDK:FJLDS:34234234!") | |
28784 [0x7070] = H("aaaa") | |
01799 [0x0707] = H("zzbb") | |
29298 [0x7272] = H("00cc") | |
40606 [0x9e9e] = H("__zz") | |
31088 [0x7970] = H("abab") | |
30076 [0x757c] = H("abba") | |
40919 [0x9fd7] = H("ππ πππ ") | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment