Skip to content

Instantly share code, notes, and snippets.

@EmilHernvall
Last active February 24, 2021 06:24
Show Gist options
  • Save EmilHernvall/953733 to your computer and use it in GitHub Desktop.
Save EmilHernvall/953733 to your computer and use it in GitHub Desktop.
Simple base64-encoder for java
public class Base64
{
public static String encode(byte[] data)
{
char[] tbl = {
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' };
StringBuilder buffer = new StringBuilder();
int pad = 0;
for (int i = 0; i < data.length; i += 3) {
int b = ((data[i] & 0xFF) << 16) & 0xFFFFFF;
if (i + 1 < data.length) {
b |= (data[i+1] & 0xFF) << 8;
} else {
pad++;
}
if (i + 2 < data.length) {
b |= (data[i+2] & 0xFF);
} else {
pad++;
}
for (int j = 0; j < 4 - pad; j++) {
int c = (b & 0xFC0000) >> 18;
buffer.append(tbl[c]);
b <<= 6;
}
}
for (int j = 0; j < pad; j++) {
buffer.append("=");
}
return buffer.toString();
}
public static byte[] decode(String data)
{
int[] tbl = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
byte[] bytes = data.getBytes();
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; ) {
int b = 0;
if (tbl[bytes[i]] != -1) {
b = (tbl[bytes[i]] & 0xFF) << 18;
}
// skip unknown characters
else {
i++;
continue;
}
int num = 0;
if (i + 1 < bytes.length && tbl[bytes[i+1]] != -1) {
b = b | ((tbl[bytes[i+1]] & 0xFF) << 12);
num++;
}
if (i + 2 < bytes.length && tbl[bytes[i+2]] != -1) {
b = b | ((tbl[bytes[i+2]] & 0xFF) << 6);
num++;
}
if (i + 3 < bytes.length && tbl[bytes[i+3]] != -1) {
b = b | (tbl[bytes[i+3]] & 0xFF);
num++;
}
while (num > 0) {
int c = (b & 0xFF0000) >> 16;
buffer.write((char)c);
b <<= 8;
num--;
}
i += 4;
}
return buffer.toByteArray();
}
}
import java.io.ByteArrayOutputStream;
import java.util.Random;
import java.util.Arrays;
import javax.xml.bind.DatatypeConverter;
public class Base64Test
{
public static void print(byte[] bytes)
{
for (byte b : bytes) {
System.out.printf("%02X ", b);
}
System.out.println();
}
public static void main(String[] args)
{
int steps = 1000000;
Random rand = new Random(System.currentTimeMillis());
System.out.println("Encode, decode");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = Base64.encode(original);
byte[] decoded = Base64.decode(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Encode, decode with other");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = Base64.encode(original);
byte[] decoded = DatatypeConverter.parseBase64Binary(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Encode with other, decode");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = DatatypeConverter.printBase64Binary(original);
byte[] decoded = Base64.decode(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Comparison test");
for (int count = 0; count < 100000; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = DatatypeConverter.printBase64Binary(original);
String encoded2 = Base64.encode(original);
if (!encoded.equals(encoded2)) {
System.out.println("mismatch");
System.out.println(encoded);
System.out.println(encoded2);
System.out.println();
}
}
}
}
# script to generate the reverse lookup table
s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
res = [(ord(c), i) for i,c in enumerate(s)]
lookup = dict(res)
f = []
for i in xrange(0,255):
if lookup.has_key(i):
n = str(lookup[i])
if len(n) == 1:
n = " " + n
f.append(" " + n)
else:
f.append(" " + str(-1))
print len(f)
print ",".join(f)
@andrewchambers
Copy link

There is still a bug in it, try and decode this string:
AAAAAAAAAAABAg==
python says:
base64.b64decode("AAAAAAAAAAABAg==")
'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02'
this one fails.

@EmilHernvall
Copy link
Author

@andrewchambers: Confirmed. :/ Seems to be more subtleties to this than I imagined. I've updated the code to work with your example.

@EmilHernvall
Copy link
Author

I've added a test which generates random bytes and uses the built-in java6 methods for comparison, which hopefully rules out further bugs. This whole thing is obviously of much less utility now that java ships with base64 support, but it might be of some use for legacy code.

@kimwooglae
Copy link

kimwooglae commented Jan 18, 2018

There are some bugs in decode method.

  1. length of tbl array is 255. add one more.
  2. java byte is signed value(-128 ~ 127). When invalid character is feeded, Exception occured. Some text editor add BOM character and it can cause that problem.
for (int i = 0; i < bytes.length;) {
   int b = 0;
   if (bytes[i] >= 0 && tbl[bytes[i]] != -1) {
      b = (tbl[bytes[i]] & 0xFF) << 18;
   }

@hrules6872
Copy link

@ZhouGongZaiShi
Copy link

ZhouGongZaiShi commented Aug 29, 2019

There are some bugs in decode method.

  1. length of tbl array is 255. add one more.
  2. java byte is signed value(-128 ~ 127). When invalid character is feeded, Exception occured. Some text editor add BOM character and it can cause that problem.
for (int i = 0; i < bytes.length;) {
   int b = 0;
   if (bytes[i] >= 0 && tbl[bytes[i]] != -1) {
      b = (tbl[bytes[i]] & 0xFF) << 18;
   }

@kimwooglae
May I ask you a question?
Why should the length of the tbl array be 256 instead of 255?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment