Created
June 25, 2013 23:32
Detecting File Type Using Magic Numbers In ColdFusion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<cfscript> | |
// I determine if the given file starts with the given hex | |
// signature. We can use this to losely determine the file type | |
// of the given file. | |
boolean function startsWithHexSignature( | |
required string filepath, | |
required string hexSignature, | |
numeric signatureOffset = 0 | |
) { | |
// The byte signature is half the length of the hex signature | |
// since a single byte is represented by two hex characters. | |
var signatureLength = ( len( hexSignature ) / 2 ); | |
// I am the byte buffer into which bytes will be streamed. | |
var byteBuffer = []; | |
arraySet( byteBuffer, 1, signatureLength, 0 ); | |
// Convert the ColdFusion array to a Java byte array so that | |
// the input stream will work with it. | |
byteBuffer = javaCast( "byte[]", byteBuffer ); | |
// Open an input stream to the file. | |
var inputStream = createObject( "java", "java.io.FileInputStream" ).init( | |
javaCast( "string", filepath ) | |
); | |
try { | |
// If the signature if offset, read off the bytes that | |
// we want to ignore. | |
if ( signatureOffset ) { | |
for ( var i = 0 ; i < signatureOffset ; i++ ) { | |
inputStream.read(); | |
} | |
} | |
// Read off the signature bytes. | |
inputStream.read( byteBuffer ); | |
return( binaryEncode( byteBuffer, "hex" ) == hexSignature ); | |
} catch ( any ioError ) { | |
// If anything went wrong with the input stream read, | |
// then we'll assume the signatures don't match. | |
return( false ); | |
} finally { | |
// No matter what happens, clean up the file. | |
inputStream.close(); | |
} | |
} | |
// ------------------------------------------------------ // | |
// ------------------------------------------------------ // | |
// ------------------------------------------------------ // | |
// ------------------------------------------------------ // | |
// Set up some test files and known hex signatures for the given | |
// files. NOTE: I gathered these signatures across different | |
// sites which is why there may be multiple tests for a given | |
// file type. | |
tests = [ | |
{ | |
filename = "file.ai", | |
hexSignature = "25504446", | |
offset = 0 | |
}, | |
{ | |
filename = "file.avi", | |
hexSignature = "52494646", | |
offset = 0 | |
}, | |
{ | |
filename = "file.avi", | |
hexSignature = "415649204C495354", | |
offset = 0 | |
}, | |
{ | |
filename = "file.gif", | |
hexSignature = "474946383761", | |
offset = 0 | |
}, | |
{ | |
filename = "file.gif", | |
hexSignature = "474946383961", | |
offset = 0 | |
}, | |
{ | |
filename = "file.jpg", | |
hexSignature = "FFD8FFE0", | |
offset = 0 | |
}, | |
{ | |
filename = "file.jpg", | |
hexSignature = "494600", | |
offset = 0 | |
}, | |
{ | |
filename = "file.jpg", | |
hexSignature = "FFD8FFE1", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "0000001466747970", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "6D6F6F76", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "71742020", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "66726565", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "6D646174", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "77696465", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "706E6F74", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mov", | |
hexSignature = "736B6970", | |
offset = 4 | |
}, | |
{ | |
filename = "file.mp3", | |
hexSignature = "FFFB", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mp3", | |
hexSignature = "494433", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mp4", | |
hexSignature = "0000001866747970", | |
offset = 0 | |
}, | |
{ | |
filename = "file.mp4", | |
hexSignature = "33677035", | |
offset = 0 | |
}, | |
{ | |
filename = "file.pdf", | |
hexSignature = "25504446", // <-- NOTE: Same as AI. | |
offset = 0 | |
}, | |
{ | |
filename = "file.png", | |
hexSignature = "89504E470D0A1A0A", | |
offset = 0 | |
}, | |
{ | |
filename = "file.psd", | |
hexSignature = "4F676753", | |
offset = 0 | |
}, | |
{ | |
filename = "file.psd", | |
hexSignature = "38425053", | |
offset = 0 | |
}, | |
{ | |
filename = "file.xlsx", | |
hexSignature = "504B0304", // <-- NOTE: Same as ZIP. | |
offset = 0 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "504B0304", | |
offset = 0 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "504B0506", | |
offset = 0 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "504B0708", | |
offset = 0 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "504B4C495445", | |
offset = 30 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "504B537058", | |
offset = 526 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "57696E5A6970", | |
offset = 29 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "57696E5A6970", | |
offset = 152 | |
}, | |
{ | |
filename = "file.zip", | |
hexSignature = "1F8B08", | |
offset = 0 | |
} | |
]; | |
// Loop over each test and test the hex signature. | |
for ( test in tests ) { | |
writeOutput( "#test.filename# [ #test.hexSignature# ] : " ); | |
writeOutput( | |
startsWithHexSignature( | |
expandPath( "./tests/" & test.filename ), | |
test.hexSignature , | |
test.offset | |
) | |
); | |
writeOutput( "<br />" ); | |
} | |
</cfscript> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment