Skip to content

Instantly share code, notes, and snippets.

@treffynnon
Created July 11, 2011 13:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save treffynnon/1075789 to your computer and use it in GitHub Desktop.
Save treffynnon/1075789 to your computer and use it in GitHub Desktop.
A prototype/alpha pure PHP wrapper for the ssdeep binary (tested with version 2.4).
<?php
/**
* Some examples of how to use this prototype. I highly recommend that you use
* the PHP PECL extension I wrote instead though. It is better in every way.
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep
*/
// Get a hash via a supplied file path
$SsDeep = new SsDeep();
$hash_string = $SsDeep->getHash($file);
// Find matches and near matches for a file
$matches = array();
// An array of hashes that you have previously generated and perhaps
// stored in a database as I did
$array_of_hashes = array(
array(
'384:fPqFB9Pg3x0bW4N6l7LTLCdLY7q4YDEhrnMm4gQJHSgY9nwE5Bhgi:6FG6bi7HOdc7q4Y4TMmiJHSgYu8j',
'/file/name/here'
)
));
$file = '/path/to/your/file.ext';
$SsDeep = new SsDeep();
$SsDeep->setHashes($array_of_hashes);
$matches = $SsDeep->checkMatches($file);
<?php
/**
* I highly recommend that you use
* the PHP PECL extension I wrote instead though. It is better in every way.
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep
*
* @author Simon Holywell <treffynnon@php.net>
* @version 7.5.2010 prototype
*/
class SsDeep {
/**
* ssdeep hash file header format string
* @var string
*/
private $hashFileHeader = 'ssdeep,1.0--blocksize:hash:hash,filename';
/**
* The temporary hash list file name
* @var string
*/
private $tempFileName = '';
/**
* The contents of the hash file
* @var string
*/
private $hashes = '';
/**
* Check for matches using ssdeep
* @param string $source_file
* @return boolean|array
*/
public function checkMatches($source_file) {
$hashes = $this->writeHashFile();
if($hashes) {
$ssdeep = new SsDeepWrapper();
$matches = $ssdeep->checkMatches($source_file, $hashes);
$this->removeHashFile();
return $matches;
} else {
return false;
}
}
/**
* Get the hashes back in the ssdeep file format
* @example $this->setHashes(
* array(
* array(
* '384:fPqFB9Pg3x0bW4N6l7LTLCdLY7q4YDEhrnMm4gQJHSgY9nwE5Bhgi:6FG6bi7HOdc7q4Y4TMmiJHSgYu8j',
* '5'
* )
* ));
* @param array $details
* @return string
*/
public function setHashes($details) {
$results = array();
$output = '';
if(is_array($details)) {
$results = $details;
}
foreach($results as $result) {
$output .= $result[0] . ',"' . $result[1] . '"' . "\n";
}
$this->hashes = $output;
return $output;
}
public function getHashes() {
return $this->hashes;
}
/**
* Write the hashes to a temporary file
* @return string|boolean
*/
private function writeHashFile() {
$this->tempFileName = tempnam(APPLICATION_PATH . 'temp/', '');
$output = $this->hashFileHeader . "\n" . $this->getHashes();
if(file_put_contents($this->tempFileName, $output)) {
return $this->tempFileName;
} else {
return false;
}
}
/**
* Delete the temporary hash list file
*/
private function removeHashFile() {
if(file_exists($this->tempFileName)) {
unlink($this->tempFileName);
}
}
/**
* Get the hash of a file
* @param string $file_path
* @return string
*/
public function getHash($file_path) {
$ssdeep = new SsDeepWrapper();
return $ssdeep->hash($file_path);
}
}
<?php
/**
* I highly recommend that you use
* the PHP PECL extension I wrote instead though. It is better in every way.
* For more information see http://php.net/ssdeep and http://pecl.php.net/ssdeep
*
* @author Simon Holywell <treffynnon@php.net>
* @version 7.5.2010 prototype
*/
class SsDeepWrapper {
/**
* Location of the ssdeep binary file
* @var string
*/
private $binary = 'ssdeep';
/**
* The source document to process
* @var string
*/
private $srcFileName = '/tmp/test.txt';
/**
* File of hashes to compare against
* @var string
*/
private $hashListFile = '/tmp/test.txt';
/**
* Calculate the hash for a given file path
* @param string $source_file
*/
public function hash($source_file = '') {
if(!empty($source_file)) {
$this->setSrcFileName($source_file);
}
$output = shell_exec($this->binary . ' -bc ' . $this->srcFileName);
if(!preg_match('/^ssdeep,\d.\d--blocksize:hash:hash,filename\s([0-9]+:.+),".*"$/', $output, $matches)) {
throw new SsDeepFailureException('ssdeep has failed to create a hash for the supplied file.');
}
return $matches[1];
}
/**
* Check for matching files already in the system.
* @param string $source_file
* @param string $hash_list
* @return array
*/
public function checkMatches($source_file = '', $hash_list = '') {
if(!empty($source_file)) {
$this->setSrcFileName($source_file);
}
if(!empty($hash_list)) {
$this->setHashListFileName($hash_list);
}
exec($this->binary . ' -bm ' . $this->hashListFile . ' ' . $this->srcFileName, $output, $return);
$return = array();
foreach($output as $output_line) {
if(preg_match('/^.*\ matches\ .*:(.*)\ \((\d{1,3})\)$/', $output_line, $matches)) {
$return[] = array(
'filename' => $matches[1],
'percentage' => $matches[2]
);
}
}
return $return;
}
/**
* Which file are we running this operation on?
* @param string $file
*/
public function setSrcFileName($file) {
if(!empty($file) and
is_readable($file)) {
$this->srcFileName = $file;
} else {
throw new SsDeepUnreadableException('The source file "' . $file . '" does not exist or is not readable.');
}
}
/**
* Set the file with the list of hashes
* @param string $file
*/
public function setHashListFileName($file) {
if(!empty($file) and
is_readable($file)) {
$this->hashListFile = $file;
} else {
throw new SsDeepUnreadableException('The hash list file "' . $file . '" does not exist or is not readable.');
}
}
}
/**
* Exceptions for this class
*/
class SsDeepUnreadableException extends Exception {}
class SsDeepFailureException extends Exception {}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment