Created
September 26, 2016 11:58
-
-
Save bennadel/02d5ec9f759ef2dc9939a53c0919214e to your computer and use it in GitHub Desktop.
Running MySQL Compress() And Uncompress() Compatible Methods In ColdFusion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
component | |
output = false | |
hint = "I provide MySQL compatible compress() and decompress() methods." | |
{ | |
/** | |
* I initialize the compressor service. | |
* | |
* @bufferSize I am the size of the buffer to use during the compression actions. | |
* @output false | |
*/ | |
public any function init( numeric bufferSize = 1024 ) { | |
// This value acts a default for the buffer size and can be overridden in the | |
// individual inflation and deflation method invocations. | |
DEFAULT_BUFFER_SIZE = bufferSize; | |
return( this ); | |
} | |
// --- | |
// PUBLIC METHODS. | |
// --- | |
/** | |
* I compress the given string, returning the MySQL representation of the compressed | |
* binary. The binary value contains both the original length of the input and the | |
* compressed data, which is how MySQL stores it. | |
* | |
* @input I am the string value being compressed. | |
* @output false | |
*/ | |
public binary function compress( required string input ) { | |
// If the input is empty, return an empty binary - we don't bother storing the | |
// length for empty strings. | |
if ( ! len( input ) ) { | |
return( binaryNew( 0 ) ); | |
} | |
var lengthBytes = lengthToBinary( len( input ) ); | |
var compressedBytes = zlibCompress( input ); | |
return( binaryConcat( lengthBytes, compressedBytes ) ); | |
} | |
/** | |
* I extract the compressed bytes from the given MySQL representation of the | |
* compressed value (which combines both the length of the original input and the | |
* compressed data). | |
* | |
* CAUTION: This method does not take into account the possible empty byte-array | |
* if the original input was an empty string. A non-zero length is assumed. | |
* | |
* @input I am the MySQL representation of the compressed value. | |
* @output false | |
*/ | |
public binary function getCompressedBytes( required binary input ) { | |
// Slice off 5 -> N bytes. | |
return( binarySlice( input, 5, ( arrayLen( input ) - 5 ) ) ); | |
} | |
/** | |
* I extract the original-length bytes from the given MySQL representation of the | |
* compressed value (which combines both the length of the original input and the | |
* compressed data). | |
* | |
* CAUTION: This method does not take into account the possible empty byte-array | |
* if the original input was an empty string. A non-zero length is assumed. | |
* | |
* @input I am the MySQL representation of the compressed value. | |
* @output false | |
*/ | |
public binary function getLengthBytes( required binary input ) { | |
// Slice off 1 -> 4 bytes. | |
return( binarySlice( input, 1, 4 ) ); | |
} | |
/** | |
* I uncompress the given MySQL representation of the compressed value, returning | |
* the original input string. | |
* | |
* @input I am the MySQL binary representation of the compressed value. | |
* @output false | |
*/ | |
public string function uncompress( required binary input ) { | |
// If the input binary is an empty byte-array, it means that the original input | |
// was an empty string. Just return the empty string. | |
if ( ! arrayLen( input ) ) { | |
return( "" ); | |
} | |
return( zlibUncompress( getCompressedBytes( input ) ) ); | |
} | |
/** | |
* I return the length of the original input string for the given MySQL representation | |
* of the compressed value. | |
* | |
* @input I am the MySQL binary representation of the compressed value. | |
* @output false | |
*/ | |
public numeric function uncompressedLength( required binary input ) { | |
// If the input binary is an empty byte-array, it means that the original input | |
// was an empty string. Just return zero. | |
if ( ! arraylen( input ) ) { | |
return( 0 ); | |
} | |
return( lengthFromBinary( getLengthBytes( input ) ) ); | |
} | |
// --- | |
// PRIVATE METHODS. | |
// --- | |
/** | |
* I concatenate the given binary values (in order), returning the resultant binary. | |
* | |
* @binaryA I am the first binary value. | |
* @binaryB I am the second binary value. | |
* @output false | |
*/ | |
private binary function binaryConcat( | |
required binary binaryA, | |
required binary binaryB | |
) { | |
var length = ( arrayLen( binaryA ) + arrayLen( binaryB ) ); | |
var byteStream = createObject( "java", "java.io.ByteArrayOutputStream" ) | |
.init( javaCast( "int", length ) ) | |
; | |
byteStream.write( binaryA ); | |
byteStream.write( binaryB ); | |
return( byteStream.toByteArray() ); | |
} | |
/** | |
* I create a new binary value (byte array) of the given length, filling it with the | |
* given byte initializer. | |
* | |
* @length I am the length of the new binary value. | |
* @fillByte I am the byte value used to fill the new byte array. | |
* @output false | |
*/ | |
private binary function binaryNew( | |
required numeric length, | |
numeric fillByte = 0 | |
) { | |
var bytes = []; | |
if ( length ) { | |
arrayResize( bytes, length ); | |
arraySet( bytes, 1, length, fillByte ); | |
} | |
return( javaCast( "byte[]", bytes ) ); | |
} | |
/** | |
* I reverse the given binary value (byte array), returning a new binary value. | |
* | |
* @input I am the binary value being reversed. | |
* @output false | |
*/ | |
private binary function binaryReverse( required binary input ) { | |
var result = []; | |
var resultLength = arrayLen( input ); | |
arrayResize( result, resultLength ); | |
for ( var i = 1 ; i <= resultLength ; i++ ) { | |
result[ resultLength - i + 1 ] = input[ i ]; | |
} | |
return( javaCast( "byte[]", result ) ); | |
} | |
/** | |
* I slice out a portion from the given binary value, returning a new binary value. | |
* | |
* @input I an the binary value (byte array) being sliced. | |
* @index I am the ONE-BASED index at which to start slicing. | |
* @length I am the number of bytes to slice. | |
* @output false | |
*/ | |
private binary function binarySlice( | |
required binary input, | |
required numeric index, | |
required numeric length | |
) { | |
return( javaCast( "byte[]", arraySlice( input, index, length ) ) ); | |
} | |
/** | |
* I convert the given binary value to a number. | |
* | |
* @input I am the binary value representation of a number. | |
* @output false | |
*/ | |
private numeric function binaryToInt( required binary input ) { | |
var result = createObject( "java", "java.math.BigInteger" ) | |
.init( input ) | |
.intValue() | |
; | |
return( result ); | |
} | |
/** | |
* I convert the given number to a binary value. | |
* | |
* @input I am the number being converted to a binary representation. | |
* @output false | |
*/ | |
private binary function intToBinary( required numeric input ) { | |
var result = createObject( "java", "java.math.BigInteger" ) | |
.valueOf( javaCast( "int", input ) ) | |
.toByteArray() | |
; | |
return( result ); | |
} | |
/** | |
* I get the length of the original input string based on the given length bytes | |
* extracted from the MySQL representation. | |
* | |
* @input I am the length bytes portion of the MySQL representation. | |
* @output false | |
*/ | |
private numeric function lengthFromBinary( required binary input ) { | |
// MySQL stores the length bytes in a low-byte-first order. As such, we have to | |
// reverse the bytes before converting them to a number representation. | |
return( binaryToInt( binaryReverse( input ) ) ); | |
} | |
/** | |
* I get the binary representation of the given length for use with the compressed | |
* MySQL representation. | |
* | |
* @length I am the length for which we are getting bytes. | |
* @output false | |
*/ | |
private binary function lengthToBinary( required numeric length ) { | |
// MySQL stores the length bytes in a low-byte-first order. As such, we have to | |
// reverse the byte representation of the given number. | |
var lengthBytes = binaryReverse( intToBinary( length ) ); | |
// Make sure the resultant value is at least 4-bytes. | |
var bytesForStorage = binaryConcat( lengthBytes, binaryNew( 4 ) ); | |
// Return the first 4-bytes. | |
return( binarySlice( bytesForStorage, 1, 4 ) ); | |
} | |
/** | |
* I compress the given input using the ZLIB compression library. | |
* | |
* @input I am the string being deflated. | |
* @bufferSize I am the size of the buffer to use while deflating. | |
* @output false | |
*/ | |
private binary function zlibCompress( | |
required string input, | |
numeric bufferSize = DEFAULT_BUFFER_SIZE | |
) { | |
var buffer = binaryNew( bufferSize ); | |
var inputBytes = charsetDecode( input, "utf8" ); | |
var byteStream = createObject( "java", "java.io.ByteArrayOutputStream" ).init(); | |
var deflater = createObject( "java", "java.util.zip.Deflater" ).init(); | |
deflater.setInput( inputBytes ); | |
deflater.finish(); | |
var byteCount = deflater.deflate( buffer ); | |
// Continue to pump the deflated bytes into the byte stream while there is still | |
// input data to be deflated. | |
while ( byteCount ) { | |
byteStream.write( buffer, javaCast( "int", 0 ), javaCast( "int", byteCount ) ); | |
byteCount = deflater.deflate( buffer ); | |
} | |
deflater.end(); | |
return( byteStream.toByteArray() ); | |
} | |
/** | |
* I uncompress the given binary using the ZLIB compression library. | |
* | |
* @input I am the binary value being inflated. | |
* @bufferSize I am the size of the buffer to use while inflating. | |
* @output false | |
*/ | |
private string function zlibUncompress( | |
required binary input, | |
numeric bufferSize = DEFAULT_BUFFER_SIZE | |
) { | |
var outputBuffer = binaryNew( bufferSize ); | |
var byteStream = createObject( "java", "java.io.ByteArrayOutputStream" ).init(); | |
var inflater = createObject( "java", "java.util.zip.Inflater" ).init(); | |
inflater.setInput( input ); | |
var byteCount = inflater.inflate( outputBuffer ); | |
// Continue to pump the inflated bytes into the byte stream while there is still | |
// input data to be inflated. | |
while ( byteCount ) { | |
byteStream.write( outputBuffer, javaCast( "int", 0 ), javaCast( "int", byteCount ) ); | |
byteCount = inflater.inflate( outputBuffer ); | |
} | |
inflater.end(); | |
return( byteStream.toString( javaCast( "string", "UTF-8" ) ) ); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!--- | |
Set up the test values that we will put through the ColdFusion-based compress() and | |
uncompress() life-cycle. | |
-- | |
CAUTION: According to the MySQL documentation, it appears that an extra "." may be | |
placed at the end of the values to prevent space-trimming; however, I was never able | |
to reproduce this edge-case. As such, I am not entirely sure that this case is | |
properly handled in the ColdFusion code. | |
---> | |
<cfset valueA = "" /> | |
<cfset valueB = "." /> | |
<cfset valueC = "Hello world!" /> | |
<cfset valueD = " give me space " /> | |
<cfset valueE = repeatString( " ", 2000 ) /> | |
<cfset valueF = repeatString( "blam", 111222 ) /> | |
<!--- Create the ColdFusion-based, MySQL-compatible compression component. ---> | |
<cfset compressor = new MySqlCompressor() /> | |
<!--- Use ColdFusion to COMPRESS the test values (creates binaries values). ---> | |
<cfset cfCompressedA = compressor.compress( valueA ) /> | |
<cfset cfCompressedB = compressor.compress( valueB ) /> | |
<cfset cfCompressedC = compressor.compress( valueC ) /> | |
<cfset cfCompressedD = compressor.compress( valueD ) /> | |
<cfset cfCompressedE = compressor.compress( valueE ) /> | |
<cfset cfCompressedF = compressor.compress( valueF ) /> | |
<!--- Use ColdFusion to UNCOMPRESS the test values. ---> | |
<cfset cfUncompressedA = compressor.uncompress( cfCompressedA ) /> | |
<cfset cfUncompressedB = compressor.uncompress( cfCompressedB ) /> | |
<cfset cfUncompressedC = compressor.uncompress( cfCompressedC ) /> | |
<cfset cfUncompressedD = compressor.uncompress( cfCompressedD ) /> | |
<cfset cfUncompressedE = compressor.uncompress( cfCompressedE ) /> | |
<cfset cfUncompressedF = compressor.uncompress( cfCompressedF ) /> | |
<!--- | |
Assert that the full-life-cycle Uncompressed values match the original input | |
values. This ensures that ColdFusion can figure out how to convert to and from the | |
compressed format (from ColdFusion's perspective). | |
---> | |
<cfif ( | |
( valueA neq cfUncompressedA ) || | |
( valueB neq cfUncompressedB ) || | |
( valueC neq cfUncompressedC ) || | |
( valueD neq cfUncompressedD ) || | |
( valueE neq cfUncompressedE ) || | |
( valueF neq cfUncompressedF ) | |
)> | |
ColdFusion's local compression life-cycle failed. | |
<cfabort /> | |
</cfif> | |
<!--- | |
If we've made it this far, the ColdFusion code thinks that it can compress and | |
uncompress values; however, we don't yet know if it is actually compatible with | |
MySQL's COMPRESS() and UNCOMPRESS() functions. To test this, we'll let MySQL | |
compress the same values and then compare the binaries. | |
---> | |
<cfquery name="mySql" datasource="testing"> | |
SELECT | |
COMPRESS( <cfqueryparam value="#valueA#" cfsqltype="cf_sql_varchar" /> ) AS compressedA, | |
COMPRESS( <cfqueryparam value="#valueB#" cfsqltype="cf_sql_varchar" /> ) AS compressedB, | |
COMPRESS( <cfqueryparam value="#valueC#" cfsqltype="cf_sql_varchar" /> ) AS compressedC, | |
COMPRESS( <cfqueryparam value="#valueD#" cfsqltype="cf_sql_varchar" /> ) AS compressedD, | |
COMPRESS( <cfqueryparam value="#valueE#" cfsqltype="cf_sql_varchar" /> ) AS compressedE, | |
COMPRESS( <cfqueryparam value="#valueF#" cfsqltype="cf_sql_varchar" /> ) AS compressedF | |
; | |
</cfquery> | |
<!--- | |
MySQL has returned Blob (binary) values. Now, we just need to see if the binary | |
values returned by MySQL match the binary values returned by the ColdFusion-based | |
compressor. Since we can't compare the binary values directly, we'll compare the | |
HEX-encoding of both sets of values. | |
---> | |
<cfif ( | |
( binaryEncode( cfCompressedA, "hex" ) neq binaryEncode( mySql.compressedA, "hex" ) ) || | |
( binaryEncode( cfCompressedB, "hex" ) neq binaryEncode( mySql.compressedB, "hex" ) ) || | |
( binaryEncode( cfCompressedC, "hex" ) neq binaryEncode( mySql.compressedC, "hex" ) ) || | |
( binaryEncode( cfCompressedD, "hex" ) neq binaryEncode( mySql.compressedD, "hex" ) ) || | |
( binaryEncode( cfCompressedE, "hex" ) neq binaryEncode( mySql.compressedE, "hex" ) ) || | |
( binaryEncode( cfCompressedF, "hex" ) neq binaryEncode( mySql.compressedF, "hex" ) ) | |
)> | |
ColdFusion blobs DO NOT MATCH MySQL blobs. | |
<cfabort /> | |
</cfif> | |
<!--- | |
If we made it this far, we know that the compressed binaries produced by ColdFusion | |
and MySQL match. This proves that they compress with compatible algorithms. And, | |
since we also know that ColdFusion can uncompress its own binaries, it follows | |
logically that it should be able uncompress in a way that is compatible with MySQL | |
as well. | |
---> | |
Done! All values match! All life-cycles are compatible. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment