Created
February 2, 2016 23:58
-
-
Save fzakaria/95a714dcc8d06508524f to your computer and use it in GitHub Desktop.
A simple Base85 / Ascii85 codec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A very simple class that helps encode/decode for Ascii85 / base85 | |
* The version that is likely most similar that is implemented here would be the Adobe version. | |
* @see <a href="https://en.wikipedia.org/wiki/Ascii85">Ascii85</a> | |
*/ | |
public class Base85 { | |
private final static int ASCII_SHIFT = 33; | |
private static long[] BASE85_POW = { | |
1, | |
85, | |
85 * 85, | |
85 * 85 * 85, | |
85 * 85 * 85 *85 | |
}; | |
private static Pattern REMOVE_WHITESPACE = Pattern.compile("\\s+"); | |
private Base85() { | |
} | |
/** | |
* This is a very simple base85 decoder. It respects the 'z' optimization for empty chunks, & | |
* strips whitespace between characters to respect line limits. | |
* @see <a href="https://en.wikipedia.org/wiki/Ascii85">Ascii85</a> | |
* @param chars The input characters that are base85 encoded. | |
* @return The binary data decoded from the input | |
*/ | |
static byte[] decode(String chars) { | |
if (chars == null || chars.length() == 0) { | |
throw new IllegalArgumentException("You must provide a non-zero length input"); | |
} | |
//By using five ASCII characters to represent four bytes of binary data the encoded size ¹⁄₄ is larger than the original | |
ByteBuffer bytebuff = ByteBuffer.allocate( (chars.length() * 4/5) ); | |
//1. Whitespace characters may occur anywhere to accommodate line length limitations. So lets strip it. | |
chars = REMOVE_WHITESPACE.matcher(chars).replaceAll(""); | |
//Since Base85 is an ascii encoder, we don't need to get the bytes as UTF-8. | |
byte[] payload = chars.getBytes(StandardCharsets.US_ASCII); | |
byte[] chunk = new byte[5]; | |
int chunkIndex = 0; | |
for(int i = 0 ; i < payload.length; i++) { | |
byte currByte = payload[i]; | |
//Because all-zero data is quite common, an exception is made for the sake of data compression, | |
//and an all-zero group is encoded as a single character "z" instead of "!!!!!". | |
if (currByte == 'z') { | |
if (chunkIndex > 0) { | |
throw new IllegalArgumentException("The payload is not base 85 encoded."); | |
} | |
chunk[chunkIndex++] = '!'; | |
chunk[chunkIndex++] = '!'; | |
chunk[chunkIndex++] = '!'; | |
chunk[chunkIndex++] = '!'; | |
chunk[chunkIndex++] = '!'; | |
} else { | |
chunk[chunkIndex++] = currByte; | |
} | |
if (chunkIndex == 5) { | |
bytebuff.put(decodeChunk(chunk)); | |
Arrays.fill(chunk, (byte) 0); | |
chunkIndex = 0; | |
}; | |
} | |
//If we didn't end on 0, then we need some padding | |
if (chunkIndex > 0) { | |
int numPadded = 5 - chunkIndex; | |
Arrays.fill(chunk, chunkIndex, 5, (byte)'u'); | |
byte[] paddedDecode = decodeChunk(chunk); | |
for(int i = 0 ; i < 4 - numPadded; i++) { | |
bytebuff.put(paddedDecode[i]); | |
} | |
} | |
bytebuff.flip(); | |
return Arrays.copyOf(bytebuff.array(),bytebuff.limit()); | |
} | |
private static byte[] decodeChunk(byte[] chunk) { | |
if (chunk.length != 5) { | |
throw new IllegalArgumentException("You can only decode chunks of size 5."); | |
} | |
int value = 0; | |
value += (chunk[0] - ASCII_SHIFT) * BASE85_POW[4]; | |
value += (chunk[1] - ASCII_SHIFT) * BASE85_POW[3]; | |
value += (chunk[2] - ASCII_SHIFT) * BASE85_POW[2]; | |
value += (chunk[3] - ASCII_SHIFT) * BASE85_POW[1]; | |
value += (chunk[4] - ASCII_SHIFT) * BASE85_POW[0]; | |
return new byte[] { | |
(byte) (value >>> 24), | |
(byte) (value >>> 16), | |
(byte) (value >>> 8), | |
(byte) (value) | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I moved this to a repository: https://github.com/fzakaria/ascii85