Instantly share code, notes, and snippets.

@cls /utf8base64.c
Last active Aug 7, 2017

Embed
What would you like to do?
Print non-ASCII UTF-8 sequences' unique Base64 identifiers
#include <stdio.h>
const char clo[64] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6,
};
const char b64[64] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
};
void
utf8base64(unsigned char *s)
{
while (*s) {
if (*s >= 0300) {
int n = clo[*s % 64] + 2;
for (int i = 0; i < n; i++)
putchar(b64[*s++ % 64]);
} else
s++;
}
}
int
main(int argc, char **argv)
{
for (int i = 1; i < argc; i++) {
utf8base64(argv[i]);
putchar('\n');
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment