Created
July 11, 2011 13:05
-
-
Save treffynnon/1075789 to your computer and use it in GitHub Desktop.
A prototype/alpha pure PHP wrapper for the ssdeep binary (tested with version 2.4).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Some examples of how to use this prototype. I highly recommend that you use | |
* the PHP PECL extension I wrote instead though. It is better in every way. | |
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep | |
*/ | |
// Get a hash via a supplied file path | |
$SsDeep = new SsDeep(); | |
$hash_string = $SsDeep->getHash($file); | |
// Find matches and near matches for a file | |
$matches = array(); | |
// An array of hashes that you have previously generated and perhaps | |
// stored in a database as I did | |
$array_of_hashes = array( | |
array( | |
'384:fPqFB9Pg3x0bW4N6l7LTLCdLY7q4YDEhrnMm4gQJHSgY9nwE5Bhgi:6FG6bi7HOdc7q4Y4TMmiJHSgYu8j', | |
'/file/name/here' | |
) | |
)); | |
$file = '/path/to/your/file.ext'; | |
$SsDeep = new SsDeep(); | |
$SsDeep->setHashes($array_of_hashes); | |
$matches = $SsDeep->checkMatches($file); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* I highly recommend that you use | |
* the PHP PECL extension I wrote instead though. It is better in every way. | |
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep | |
* | |
* @author Simon Holywell <treffynnon@php.net> | |
* @version 7.5.2010 prototype | |
*/ | |
class SsDeep { | |
/** | |
* ssdeep hash file header format string | |
* @var string | |
*/ | |
private $hashFileHeader = 'ssdeep,1.0--blocksize:hash:hash,filename'; | |
/** | |
* The temporary hash list file name | |
* @var string | |
*/ | |
private $tempFileName = ''; | |
/** | |
* The contents of the hash file | |
* @var string | |
*/ | |
private $hashes = ''; | |
/** | |
* Check for matches using ssdeep | |
* @param string $source_file | |
* @return boolean|array | |
*/ | |
public function checkMatches($source_file) { | |
$hashes = $this->writeHashFile(); | |
if($hashes) { | |
$ssdeep = new SsDeepWrapper(); | |
$matches = $ssdeep->checkMatches($source_file, $hashes); | |
$this->removeHashFile(); | |
return $matches; | |
} else { | |
return false; | |
} | |
} | |
/** | |
* Get the hashes back in the ssdeep file format | |
* @example $this->setHashes( | |
* array( | |
* array( | |
* '384:fPqFB9Pg3x0bW4N6l7LTLCdLY7q4YDEhrnMm4gQJHSgY9nwE5Bhgi:6FG6bi7HOdc7q4Y4TMmiJHSgYu8j', | |
* '5' | |
* ) | |
* )); | |
* @param array $details | |
* @return string | |
*/ | |
public function setHashes($details) { | |
$results = array(); | |
$output = ''; | |
if(is_array($details)) { | |
$results = $details; | |
} | |
foreach($results as $result) { | |
$output .= $result[0] . ',"' . $result[1] . '"' . "\n"; | |
} | |
$this->hashes = $output; | |
return $output; | |
} | |
public function getHashes() { | |
return $this->hashes; | |
} | |
/** | |
* Write the hashes to a temporary file | |
* @return string|boolean | |
*/ | |
private function writeHashFile() { | |
$this->tempFileName = tempnam(APPLICATION_PATH . 'temp/', ''); | |
$output = $this->hashFileHeader . "\n" . $this->getHashes(); | |
if(file_put_contents($this->tempFileName, $output)) { | |
return $this->tempFileName; | |
} else { | |
return false; | |
} | |
} | |
/** | |
* Delete the temporary hash list file | |
*/ | |
private function removeHashFile() { | |
if(file_exists($this->tempFileName)) { | |
unlink($this->tempFileName); | |
} | |
} | |
/** | |
* Get the hash of a file | |
* @param string $file_path | |
* @return string | |
*/ | |
public function getHash($file_path) { | |
$ssdeep = new SsDeepWrapper(); | |
return $ssdeep->hash($file_path); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* I highly recommend that you use | |
* the PHP PECL extension I wrote instead though. It is better in every way. | |
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep | |
* | |
* @author Simon Holywell <treffynnon@php.net> | |
* @version 7.5.2010 prototype | |
*/ | |
class SsDeepWrapper { | |
/** | |
* Location of the ssdeep binary file | |
* @var string | |
*/ | |
private $binary = 'ssdeep'; | |
/** | |
* The source document to process | |
* @var string | |
*/ | |
private $srcFileName = '/tmp/test.txt'; | |
/** | |
* File of hashes to compare against | |
* @var string | |
*/ | |
private $hashListFile = '/tmp/test.txt'; | |
/** | |
* Calculate the hash for a given file path | |
* @param string $source_file | |
*/ | |
public function hash($source_file = '') { | |
if(!empty($source_file)) { | |
$this->setSrcFileName($source_file); | |
} | |
$output = shell_exec($this->binary . ' -bc ' . $this->srcFileName); | |
if(!preg_match('/^ssdeep,\d.\d--blocksize:hash:hash,filename\s([0-9]+:.+),".*"$/', $output, $matches)) { | |
throw new SsDeepFailureException('ssdeep has failed to create a hash for the supplied file.'); | |
} | |
return $matches[1]; | |
} | |
/** | |
* Check for matching files already in the system. | |
* @param string $source_file | |
* @param string $hash_list | |
* @return array | |
*/ | |
public function checkMatches($source_file = '', $hash_list = '') { | |
if(!empty($source_file)) { | |
$this->setSrcFileName($source_file); | |
} | |
if(!empty($hash_list)) { | |
$this->setHashListFileName($hash_list); | |
} | |
exec($this->binary . ' -bm ' . $this->hashListFile . ' ' . $this->srcFileName, $output, $return); | |
$return = array(); | |
foreach($output as $output_line) { | |
if(preg_match('/^.*\ matches\ .*:(.*)\ \((\d{1,3})\)$/', $output_line, $matches)) { | |
$return[] = array( | |
'filename' => $matches[1], | |
'percentage' => $matches[2] | |
); | |
} | |
} | |
return $return; | |
} | |
/** | |
* Which file are we running this operation on? | |
* @param string $file | |
*/ | |
public function setSrcFileName($file) { | |
if(!empty($file) and | |
is_readable($file)) { | |
$this->srcFileName = $file; | |
} else { | |
throw new SsDeepUnreadableException('The source file "' . $file . '" does not exist or is not readable.'); | |
} | |
} | |
/** | |
* Set the file with the list of hashes | |
* @param string $file | |
*/ | |
public function setHashListFileName($file) { | |
if(!empty($file) and | |
is_readable($file)) { | |
$this->hashListFile = $file; | |
} else { | |
throw new SsDeepUnreadableException('The hash list file "' . $file . '" does not exist or is not readable.'); | |
} | |
} | |
} | |
/** | |
* Exceptions for this class | |
*/ | |
class SsDeepUnreadableException extends Exception {} | |
class SsDeepFailureException extends Exception {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment