Skip to content

Instantly share code, notes, and snippets.

@fzakaria
Created February 2, 2016 23:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fzakaria/95a714dcc8d06508524f to your computer and use it in GitHub Desktop.
Save fzakaria/95a714dcc8d06508524f to your computer and use it in GitHub Desktop.
A simple Base85 / Ascii85 codec
/**
* A very simple class that helps encode/decode for Ascii85 / base85
* The version that is likely most similar that is implemented here would be the Adobe version.
* @see <a href="https://en.wikipedia.org/wiki/Ascii85">Ascii85</a>
*/
public class Base85 {
private final static int ASCII_SHIFT = 33;
private static long[] BASE85_POW = {
1,
85,
85 * 85,
85 * 85 * 85,
85 * 85 * 85 *85
};
private static Pattern REMOVE_WHITESPACE = Pattern.compile("\\s+");
private Base85() {
}
/**
* This is a very simple base85 decoder. It respects the 'z' optimization for empty chunks, &
* strips whitespace between characters to respect line limits.
* @see <a href="https://en.wikipedia.org/wiki/Ascii85">Ascii85</a>
* @param chars The input characters that are base85 encoded.
* @return The binary data decoded from the input
*/
static byte[] decode(String chars) {
if (chars == null || chars.length() == 0) {
throw new IllegalArgumentException("You must provide a non-zero length input");
}
//By using five ASCII characters to represent four bytes of binary data the encoded size ¹⁄₄ is larger than the original
ByteBuffer bytebuff = ByteBuffer.allocate( (chars.length() * 4/5) );
//1. Whitespace characters may occur anywhere to accommodate line length limitations. So lets strip it.
chars = REMOVE_WHITESPACE.matcher(chars).replaceAll("");
//Since Base85 is an ascii encoder, we don't need to get the bytes as UTF-8.
byte[] payload = chars.getBytes(StandardCharsets.US_ASCII);
byte[] chunk = new byte[5];
int chunkIndex = 0;
for(int i = 0 ; i < payload.length; i++) {
byte currByte = payload[i];
//Because all-zero data is quite common, an exception is made for the sake of data compression,
//and an all-zero group is encoded as a single character "z" instead of "!!!!!".
if (currByte == 'z') {
if (chunkIndex > 0) {
throw new IllegalArgumentException("The payload is not base 85 encoded.");
}
chunk[chunkIndex++] = '!';
chunk[chunkIndex++] = '!';
chunk[chunkIndex++] = '!';
chunk[chunkIndex++] = '!';
chunk[chunkIndex++] = '!';
} else {
chunk[chunkIndex++] = currByte;
}
if (chunkIndex == 5) {
bytebuff.put(decodeChunk(chunk));
Arrays.fill(chunk, (byte) 0);
chunkIndex = 0;
};
}
//If we didn't end on 0, then we need some padding
if (chunkIndex > 0) {
int numPadded = 5 - chunkIndex;
Arrays.fill(chunk, chunkIndex, 5, (byte)'u');
byte[] paddedDecode = decodeChunk(chunk);
for(int i = 0 ; i < 4 - numPadded; i++) {
bytebuff.put(paddedDecode[i]);
}
}
bytebuff.flip();
return Arrays.copyOf(bytebuff.array(),bytebuff.limit());
}
private static byte[] decodeChunk(byte[] chunk) {
if (chunk.length != 5) {
throw new IllegalArgumentException("You can only decode chunks of size 5.");
}
int value = 0;
value += (chunk[0] - ASCII_SHIFT) * BASE85_POW[4];
value += (chunk[1] - ASCII_SHIFT) * BASE85_POW[3];
value += (chunk[2] - ASCII_SHIFT) * BASE85_POW[2];
value += (chunk[3] - ASCII_SHIFT) * BASE85_POW[1];
value += (chunk[4] - ASCII_SHIFT) * BASE85_POW[0];
return new byte[] {
(byte) (value >>> 24),
(byte) (value >>> 16),
(byte) (value >>> 8),
(byte) (value)
};
}
}
@fzakaria
Copy link
Author

fzakaria commented Jun 4, 2018

I moved this to a repository: https://github.com/fzakaria/ascii85

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment