Created
June 25, 2014 12:39
-
-
Save w-vi/e87b81caccf30da8a333 to your computer and use it in GitHub Desktop.
Base62 encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on: https://github.com/thehunmonkgroup/node-base62-c/blob/master/base62.cc | |
static const char base62_vals[] = "0123456789" | |
"abcdefghijklmnopqrstuvwxyz" | |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
static const int base62_index[] = { | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0, 0, | |
0, 0, 0, 0, 0, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, | |
0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, | |
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0, 0, 0, 0, 0, | |
0, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, | |
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, | |
0x21, 0x22, 0x23, | |
}; | |
static void | |
gclu_base62_reverse_inplace(gcln_buf_t *buf) | |
{ | |
ASSERT(buf); | |
char *base = buf->base; | |
const size_t len = buf->len; | |
const size_t half = len >> 1; | |
char c; | |
for (size_t i = 0; i < half; i++) | |
{ | |
c = base[i]; | |
base[i] = base[len - i - 1]; | |
base[len - i - 1] = c; | |
} | |
} | |
gcln_status_t | |
gclu_base62_encode(uint32_t val, gcln_buf_t *dst, gcln_buf_t *dst_slice_opt) | |
{ | |
ASSERT(dst && dst->base && dst->len > 0); | |
char *base = dst->base; | |
const int len = (int)dst->len; | |
int i = 0; | |
int v; | |
do | |
{ | |
if (i + 1 >= len) | |
{ | |
return GCLN_ERROR; | |
} | |
v = val % 62; | |
base[i++] = base62_vals[v]; | |
val = (val - v) / 62; | |
} while (val > 0); | |
base[i] = '\0'; | |
gcln_buf_t buf = { base, i }; | |
gclu_base62_reverse_inplace(&buf); | |
if (dst_slice_opt) | |
{ | |
*dst_slice_opt = buf; | |
} | |
return GCLN_OK; | |
} | |
gcln_status_t | |
gclu_base62_decode(gcln_buf_t src, uint32_t *val) | |
{ | |
ASSERT(src.base && src.len > 0 && val); | |
uint32_t ret = 0; | |
const char *base = src.base; | |
const int len = (int)src.len; | |
int i; | |
char c; | |
for (i = 0; i < len; i++) | |
{ | |
c = base[i]; | |
if (!isalnum(c)) | |
{ | |
return GCLN_ERROR; | |
} | |
ret += base62_index[(int)c] * pow(62, len - i - 1); | |
} | |
*val = ret; | |
return GCLN_OK; | |
} | |
gcln_status_t | |
gclu_base62_test() | |
{ | |
gcln_bool_t ok = TRUE; | |
char data[16]; | |
gcln_buf_t dst = { data, sizeof(data) }; | |
gcln_buf_t dst_slice = { 0, 0 }; | |
{ | |
const uint32_t val_in = 123456789; | |
uint32_t val_out = 0; | |
ok &= GCLN_IS_OK(gclu_base62_encode(val_in, &dst, &dst_slice)); | |
ASSERT(ok); | |
ok &= strcmp(dst_slice.base, "8m0Kx") == 0; | |
ASSERT(ok); | |
ok &= GCLN_IS_OK(gclu_base62_decode(dst_slice, &val_out)); | |
ASSERT(ok); | |
ok &= val_in == val_out; | |
ASSERT(ok); | |
} | |
for (uint32_t i = 0; i < 100; ++i) | |
{ | |
const uint32_t val_in = 999 + i * 111; | |
uint32_t val_out = 0; | |
ok &= GCLN_IS_OK(gclu_base62_encode(val_in, &dst, &dst_slice)); | |
ASSERT(ok); | |
ok &= GCLN_IS_OK(gclu_base62_decode(dst_slice, &val_out)); | |
ASSERT(ok); | |
ok &= val_in == val_out; | |
ASSERT(ok); | |
} | |
return ok; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#tuple is faster that string ... so thats why tuple | |
BASE_ALPH = tuple("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") | |
BASE_DICT = dict((c, v) for v, c in enumerate(BASE_ALPH)) | |
BASE_LEN = len(BASE_ALPH) | |
def base_decode(string): | |
num = 0 | |
for char in string: | |
num = num * BASE_LEN + BASE_DICT[char] | |
return num | |
def base_encode(num): | |
if not num: | |
return BASE_ALPH[0] | |
encoding = "" | |
while num: | |
num, rem = divmod(num, BASE_LEN) | |
encoding = BASE_ALPH[rem] + encoding | |
return encoding |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment