Skip to content

Instantly share code, notes, and snippets.

@w-vi
Created June 25, 2014 12:39
Show Gist options
  • Save w-vi/e87b81caccf30da8a333 to your computer and use it in GitHub Desktop.
Save w-vi/e87b81caccf30da8a333 to your computer and use it in GitHub Desktop.
Base62 encoding
// based on: https://github.com/thehunmonkgroup/node-base62-c/blob/master/base62.cc
static const char base62_vals[] = "0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
static const int base62_index[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0, 0,
0, 0, 0, 0, 0, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a,
0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0, 0, 0, 0, 0,
0, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23,
};
static void
gclu_base62_reverse_inplace(gcln_buf_t *buf)
{
ASSERT(buf);
char *base = buf->base;
const size_t len = buf->len;
const size_t half = len >> 1;
char c;
for (size_t i = 0; i < half; i++)
{
c = base[i];
base[i] = base[len - i - 1];
base[len - i - 1] = c;
}
}
gcln_status_t
gclu_base62_encode(uint32_t val, gcln_buf_t *dst, gcln_buf_t *dst_slice_opt)
{
ASSERT(dst && dst->base && dst->len > 0);
char *base = dst->base;
const int len = (int)dst->len;
int i = 0;
int v;
do
{
if (i + 1 >= len)
{
return GCLN_ERROR;
}
v = val % 62;
base[i++] = base62_vals[v];
val = (val - v) / 62;
} while (val > 0);
base[i] = '\0';
gcln_buf_t buf = { base, i };
gclu_base62_reverse_inplace(&buf);
if (dst_slice_opt)
{
*dst_slice_opt = buf;
}
return GCLN_OK;
}
gcln_status_t
gclu_base62_decode(gcln_buf_t src, uint32_t *val)
{
ASSERT(src.base && src.len > 0 && val);
uint32_t ret = 0;
const char *base = src.base;
const int len = (int)src.len;
int i;
char c;
for (i = 0; i < len; i++)
{
c = base[i];
if (!isalnum(c))
{
return GCLN_ERROR;
}
ret += base62_index[(int)c] * pow(62, len - i - 1);
}
*val = ret;
return GCLN_OK;
}
gcln_status_t
gclu_base62_test()
{
gcln_bool_t ok = TRUE;
char data[16];
gcln_buf_t dst = { data, sizeof(data) };
gcln_buf_t dst_slice = { 0, 0 };
{
const uint32_t val_in = 123456789;
uint32_t val_out = 0;
ok &= GCLN_IS_OK(gclu_base62_encode(val_in, &dst, &dst_slice));
ASSERT(ok);
ok &= strcmp(dst_slice.base, "8m0Kx") == 0;
ASSERT(ok);
ok &= GCLN_IS_OK(gclu_base62_decode(dst_slice, &val_out));
ASSERT(ok);
ok &= val_in == val_out;
ASSERT(ok);
}
for (uint32_t i = 0; i < 100; ++i)
{
const uint32_t val_in = 999 + i * 111;
uint32_t val_out = 0;
ok &= GCLN_IS_OK(gclu_base62_encode(val_in, &dst, &dst_slice));
ASSERT(ok);
ok &= GCLN_IS_OK(gclu_base62_decode(dst_slice, &val_out));
ASSERT(ok);
ok &= val_in == val_out;
ASSERT(ok);
}
return ok;
}
#tuple is faster that string ... so thats why tuple
BASE_ALPH = tuple("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
BASE_DICT = dict((c, v) for v, c in enumerate(BASE_ALPH))
BASE_LEN = len(BASE_ALPH)
def base_decode(string):
num = 0
for char in string:
num = num * BASE_LEN + BASE_DICT[char]
return num
def base_encode(num):
if not num:
return BASE_ALPH[0]
encoding = ""
while num:
num, rem = divmod(num, BASE_LEN)
encoding = BASE_ALPH[rem] + encoding
return encoding
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment