Skip to content

Instantly share code, notes, and snippets.

@bennadel
Created September 26, 2014 14:39
Using Base32 Encoding And Decoding In ColdFusion
<cfscript>
// NOTE: CFScript used for Gist color coding. Can be removed.
component
output = false
hint = "I provide encoding and decoding methods for Base32 values."
{
/**
* I server no purpose since the methods on this component are "static".
*
* @output false
*/
public any function init() {
return( this );
}
// ---
// STATIC METHODS.
// ---
/**
* I decode the given Base32-encoded string value.
*
* @output false
* @hint The input string is assumed to be utf-8.
*/
public string function decode( required string input ) {
var binaryOutput = decodeBinary( charsetDecode( input, "utf-8" ) );
return( charsetEncode( binaryOutput, "utf-8" ) );
}
/**
* I decode the given Base32-encoded binary value.
*
* @output false
*/
public binary function decodeBinary( required binary input ) {
// I map the encoded bytes onto the original 5-bit partial input bytes.
var decodingMap = getDecodingMap();
// I hold the intermediary, decoded bytes.
var buffer = getAllocatedDecodingBuffer( input );
// The input maybe be padded with "=" to make sure that the value is evenly
// divisible by 8 (to make the length of data more predictable). Once we hit
// this byte (if it exists), we have consumed all of the encoded data.
var terminatingByte = asc( "=" );
// I help zero-out the parts of the byte that were not discarded.
var rightMostBits = [
inputBaseN( "1", 2 ),
inputBaseN( "11", 2 ),
inputBaseN( "111", 2 ),
inputBaseN( "1111", 2 )
];
// As we loop over the encoded bytes, we may have to build up each decoded byte
// across multiple input bytes. This will help us keep track of how many more
// bits we need to complete the pending byte.
var decodedByte = 0;
var bitsNeeded = 8;
// Decode each input byte.
for ( var byte in input ) {
// If we hit the EOF byte, there's nothing more to process.
if ( byte == terminatingByte ) {
break;
}
// Get the original 5-bit input that was encoded.
var partialByte = decodingMap[ byte ];
// If we need more than 5 bits, we can consume the given value in it's
// entirety without actually filling the pending bit.
if ( bitsNeeded > 5 ) {
// Push the incoming 5-bits onto the end of the pending byte.
decodedByte = bitOr( bitSHLN( decodedByte, 5 ), partialByte );
bitsNeeded -= 5;
// If we need exactly 5 more bits, we can use the given value to complete
// the pending bit.
} else if ( bitsNeeded == 5 ) {
// Push the incoming 5-bits onto the end of the pending byte.
decodedByte = bitOr( bitSHLN( decodedByte, 5 ), partialByte );
// At this point, the pending byte is complete.
buffer.put( decodedByte );
decodedByte = 0;
bitsNeeded = 8;
// If we need between 1 and 4 bits, we have to consume the given value
// across, two different pending bytes since it won't fit entirely into the
// currently-pending byte (the leading bits complete the currently-pending
// byte, then the trailing bits start the next pending byte).
} else {
var discardedCount = ( 5 - bitsNeeded );
// Push only the leading bits onto the end of the pending byte.
decodedByte = bitOr(
bitSHLN( decodedByte, bitsNeeded ),
bitSHRN( partialByte, discardedCount )
);
// At this point, the pending byte is complete.
buffer.put( decodedByte );
// Start the next pending byte with the trailing bits that we discarded
// in the last operation.
decodedByte = bitAnd( partialByte, rightMostBits[ discardedCount ] );
bitsNeeded = ( 8 - discardedCount );
}
// NOTE: We will never need an ELSE case that requiers zero bits to complete
// the pending byte. Since each case that can result in a completed byte
// (need 5 bits (1) or less than 5 bits (2)) already push a byte on to the
// result, we will never complete a byte without pushing it onto the output.
}
// Return the result as a binary value.
return( buffer.array() );
}
/**
* I encode the given string value using Base32 encoding.
*
* @output false
* @hint The input string is assumed to be utf-8.
*/
public string function encode( required string input ) {
var binaryOutput = encodeBinary( charsetDecode( input, "utf-8" ) );
return( charsetEncode( binaryOutput, "utf-8" ) );
}
/**
* I encode the given binary value using Base32 encoding.
*
* @output false
*/
public binary function encodeBinary( required binary input ) {
// I map the 5-bit input chunks to the base32-encoding bytes.
var encodingMap = getEncodingMap();
// Base32-encoded strings must be divisible by 8 (so that the length of the data
// is more predictable). We'll pad the null characters with "=".
var paddingByte = asc( "=" );
// I hold the intermediary, encoded bytes.
var buffer = getAllocatedEncodingBuffer( input );
// In order to iterate over the input bits more easily, we'll wrap it in a
// BigInteger instance - this allows us to check individual bits without having
// to calculate the offset across multiple bytes.
var inputWrapper = createObject( "java", "java.math.BigInteger" ).init( input );
// Since BigInteger will not take leading zeros into account, we have to
// explicitly calculate the number of input bits based on the number of input
// bytes.
var bitCount = ( arrayLen( input ) * 8 );
// Since each encoded chunk uses 5 bits, which may not divide evenly into a
// set of 8-bit bytes, we need to normalize the input. Let's sanitize the input
// wrapper to be evenly divisible by 5 (by pushing zeros onto the end). This
// way, we never have to worry about reading an incomplete chunk of bits from
// the underlying data.
if ( bitCount % 5 ) {
var missingBitCount = ( 5 - ( bitCount % 5 ) );
inputWrapper = inputWrapper.shiftLeft( javaCast( "int", missingBitCount ) );
bitCount += missingBitCount;
}
// Now that we have know that our input bit count is evenly divisible by 5,
// we can loop over the input in increments of 5 to read one decoded partial
// byte at a time.
// --
// NOTE: We are starting a bitCount-1 since the bits are zero-indexed.
for ( var chunkOffset = ( bitCount - 1 ) ; chunkOffset > 0 ; chunkOffset -= 5 ) {
var partialByte = 0;
// Read 5 bits into the partial byte, starting at the chunk offset.
for ( var i = chunkOffset ; i > ( chunkOffset - 5 ) ; i-- ) {
// Implicit read of "0" bit into the right-most bit of the partial byte.
partialByte = bitSHLN( partialByte, 1 );
// If the underlying input bit is "1", update the partial byte.
if ( inputWrapper.testBit( javaCast( "int", i ) ) ) {
partialByte = bitOr( partialByte, 1 );
}
}
// At this point, the partial byte value is a number that refers to the
// index of the encoded value in the base32 character-set. Push the mapped
// characterByte onto the buffer.
buffer.put( encodingMap[ partialByte ] );
}
// If the number of chunks isn't divisible by 8, we need to pad the result.
while ( buffer.remaining() ) {
buffer.put( paddingByte );
}
// Return the result as a binary value.
return( buffer.array() );
}
// ---
// PRIVATE METHODS.
// ---
/**
* I provide a pre-allocated ByteBuffer that will store the output string value
* during the decoding process. This takes into account the possible padding of the
* input and will discard padding characters during buffer allocation.
*
* @output false
*/
private any function getAllocatedDecodingBuffer( required binary input ) {
var paddingByte = asc( "=" );
var inputLength = arrayLen( input );
// When allocating the output buffer, we don't want to take the padding
// characters into account. Decrement the input length until we hit our first
// trailing non-padding character.
while ( input[ inputLength ] == paddingByte ) {
inputLength--;
}
// Since each encoded byte reprsents only 5-bits of the encoded input, we know
// that we know that the total number of meaningful input bits is *5. But then,
// that has to represent full, 8-bit bytes.
var totalOutputBytes = fix( inputLength * 5 / 8 );
return(
createObject( "java", "java.nio.ByteBuffer" ).allocate(
javaCast( "int", totalOutputBytes )
)
);
}
/**
* I provide a pre-allocated ByteBuffer that will store the base32 value during the
* encoding process. This takes into account the possible need to pad the final output
* and will be allocated to the exact length needed for encoding.
*
* @output false
*/
private any function getAllocatedEncodingBuffer( required binary input ) {
// Each 5-bits of the input bytes will represent a byte in the encoded value. As
// such, we know that the output will required the total number of bits (n * 8)
// divided by 5-bits (for each output byte).
var totalOutputBytes = ceiling( arrayLen( input ) * 8 / 5 );
// The length of the output has to be evenly divisible by 8; as such, if it is
// not, we have to account of the trailing padding ("=") characters.
if ( totalOutputBytes % 8 ) {
totalOutputBytes += ( 8 - ( totalOutputBytes % 8 ) );
}
return(
createObject( "java", "java.nio.ByteBuffer" ).allocate(
javaCast( "int", totalOutputBytes )
)
);
}
/**
* I return an array of character-bytes used to represent the set of possible Base32
* encoding values.
*
* @output false
* @hint This returns a Java array, not a ColdFusion array.
*/
private array function getBase32Bytes() {
return( javaCast( "string", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" ).getBytes() );
}
/**
* I return a struct that maps Base32 input characters to the bytes that are
* used in the encoding process.
*
* Example: map[ encodedByte ] => byte.
*
* @output false
*/
private struct function getDecodingMap() {
var byteMap = {};
var bytes = getBase32Bytes();
var byteLength = arrayLen( bytes );
for ( var i = 1 ; i <= byteLength ; i++ ) {
byteMap[ bytes[ i ] ] = ( i - 1 );
}
return( byteMap );
}
/**
* I return a struct that maps input bytes to the characters that are used
* to encode in Base32.
*
* Example: map[ byte ] => encodedByte.
*
* @output false
*/
private struct function getEncodingMap() {
var byteMap = {};
var bytes = getBase32Bytes();
var byteLength = arrayLen( bytes );
for ( var i = 1 ; i <= byteLength ; i++ ) {
byteMap[ i - 1 ] = bytes[ i ];
}
return( byteMap );
}
}
</cfscript>
<cfscript>
// Set up our known Base32-encoded values.
tests = {
"C" = "IM======",
"Co" = "INXQ====",
"Com" = "INXW2===",
"Come" = "INXW2ZI=",
"Come " = "INXW2ZJA",
"Come w" = "INXW2ZJAO4======",
"Come wi" = "INXW2ZJAO5UQ====",
"Come wit" = "INXW2ZJAO5UXI===",
"Come with" = "INXW2ZJAO5UXI2A=",
"Come with " = "INXW2ZJAO5UXI2BA",
"Come with m" = "INXW2ZJAO5UXI2BANU======",
"Come with me" = "INXW2ZJAO5UXI2BANVSQ====",
"Come with me " = "INXW2ZJAO5UXI2BANVSSA===",
"Come with me i" = "INXW2ZJAO5UXI2BANVSSA2I=",
"Come with me if" = "INXW2ZJAO5UXI2BANVSSA2LG",
"Come with me if " = "INXW2ZJAO5UXI2BANVSSA2LGEA======",
"Come with me if y" = "INXW2ZJAO5UXI2BANVSSA2LGEB4Q====",
"Come with me if yo" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W6===",
"Come with me if you" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65I=",
"Come with me if you " = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JA",
"Come with me if you w" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO4======",
"Come with me if you wa" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QQ====",
"Come with me if you wan" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW4===",
"Come with me if you want" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45A=",
"Come with me if you want " = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BA",
"Come with me if you want t" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAOQ======",
"Come with me if you want to" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXQ====",
"Come with me if you want to " = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA===",
"Come with me if you want to l" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA3A=",
"Come with me if you want to li" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA3DJ",
"Come with me if you want to liv" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA3DJOY======",
"Come with me if you want to live" = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA3DJOZSQ====",
"Come with me if you want to live." = "INXW2ZJAO5UXI2BANVSSA2LGEB4W65JAO5QW45BAORXSA3DJOZSS4==="
};
// Instantiate our base32 encoder - the encode/decode methods are static.
base32 = new Base32();
testInputs = structKeyArray( tests );
arraySort( testInputs, "text", "asc" );
for ( input in testInputs ) {
// Encode the value.
encodedInput = base32.encode( input );
// Decode the encoded-value.
decodedOutput = base32.decode( encodedInput );
// Check to see if the process worked in both directions.
encodingPassed = ( encodedInput == tests[ input ] );
decodingPassed = ( input == decodedOutput );
// Output test results for this test.
writeOutput( ( encodingPassed && decodingPassed ) ? "[PASS]" : "[FAIL]" );
writeOutput( " #input# >> #encodedInput# >> #decodedOutput# <br />" );
}
</cfscript>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment