Skip to content

Instantly share code, notes, and snippets.

@Danack
Created March 1, 2014 17:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Danack/9293028 to your computer and use it in GitHub Desktop.
Save Danack/9293028 to your computer and use it in GitHub Desktop.
<?php
define('BYTE_SAFE_PHRASE', 'byte safe');
define('PATH_TO_ROOT', '../');
// I always close off my php files with "\?\>" so as to detect accidental truncations
// Other people seem to leave that off, so only check certain directories for
// missing "\?\>" at the end of files.
$directoriesToCheck = array(
PATH_TO_ROOT.'basereality',
PATH_TO_ROOT.'conf',
PATH_TO_ROOT.'php_shared',
PATH_TO_ROOT.'src',
PATH_TO_ROOT.'templates',
PATH_TO_ROOT.'tools'
);
$GLOBALS['errorInfoArray'] = array();
$GLOBALS['unsafeFunctionCount'] = 0;
//Map of unsafe functions with their mb_safe equivalent, or null if there is none.
$GLOBALS['unsafeFunctionsReplaceMap'] = array(
'mail' => 'mb_send_mail',
'split' => null, //'mb_split', deprecated function - just don't use it
//'' => 'mb_strcut', no direct non-mb equivalent
//'' => 'mb_strimwidth', no direct non-mb equivalent
'stripos' => 'mb_stripos',
'stristr' => 'mb_stristr',
'strlen' => 'mb_strlen',
'strpos' => 'mb_strpos',
'strrpos' => 'mb_strrpos',
'strrchr' => 'mb_strrchr',
'strripos' => 'mb_strripos',
'strstr' => 'mb_strstr',
'strtolower' => 'mb_strtolower',
'strtoupper' => 'mb_strtoupper',
//'' => 'mb_strwidth', no direct non-mb equivalent
//'' => 'mb_substitute_character', no direct non-mb equivalent
'substr_count' => 'mb_substr_count',
'substr' => 'mb_substr',
'str_ireplace' => null,
'str_split' => 'mb_str_split',
'strcasecmp' => 'mb_strcasecmp',
'strcspn' => null, //TODO - implement alternative
//'stristr' => null, //TODO - implement alternative
//'strrev' => 'mb_strrev', //TODO - check this works
'strspn' => null, //TODO - implement alternative
'substr_replace' => 'mb_substr_replace',
'lcfirst' => null,
'ucfirst' => 'mb_ucfirst',
'ucwords' => 'mb_ucwords',
'wordwrap' => null,
);
//*******************************************
//Start of tool
//*******************************************
try{
checkFiles($directoriesToCheck);
}
catch(Exception $e){
echo "Boom ".$e->getMessage();
}
exit(0);
//*******************************************
//End of tool
//*******************************************
function checkFiles($directoriesToCheck){
foreach ($directoriesToCheck as $directoryToCheckForBOM) {
scan_directory_recursively($directoryToCheckForBOM, 'checkFile');
}
echo "File checking complete\r\n";
$numberOfIssues = count($GLOBALS['errorInfoArray']);
if ($GLOBALS['unsafeFunctionCount'] > 0) {
echo "There are ".$GLOBALS['unsafeFunctionCount']." unsafe functions to refactor.";
}
if($numberOfIssues > 0){
echo "There were $numberOfIssues in total.\r\n";
}
if ($numberOfIssues == 0 && $GLOBALS['unsafeFunctionCount'] == 0) {
echo "No issues were detected.";
}
}
function checkFile($nextFile){
$phpExtensionsToCheck = array(
'php',// => TRUE,
'inc',// => TRUE,
);
$extensionsToCheck = array(
'css',
'js',
'php',
'inc',
'tpl',
'html',
);
$isPHPFile = false;
if(in_array($nextFile['extension'], $phpExtensionsToCheck) == true) {
$isPHPFile = true;
}
if (in_array($nextFile['extension'], $extensionsToCheck) == true) {
checkForBom($nextFile['path'], false);//$isPHPFile);
checkFileForNonUTF8($nextFile['path']);
if ($isPHPFile) {
checkForUnsafeFunctions($nextFile['path']);
}
}
}
function scan_directory_recursively($directory, callable $function = null){
$directory_tree = array();
$filter = false;
// if the path has a slash at the end we remove it here
if(mb_substr($directory,-1) == '/')
{
$directory = mb_substr($directory,0,-1);
}
// if the path is not valid or is not a directory ...
if(!file_exists($directory) || !is_dir($directory))
{
// ... we return false and exit the function
return false;
// ... else if the path is readable
}elseif(is_readable($directory))
{
// we open the directory
$directory_list = opendir($directory);
// and scan through the items inside
while (false !== ($file = readdir($directory_list)))
{
// if the filepointer is not the current directory
// or the parent directory
if($file != '.' && $file != '..')
{
// we build the new path to scan
$path = $directory.'/'.$file;
// if the path is readable
if(is_readable($path))
{
// we split the new path by directories
$subdirectories = explode('/',$path);
// if the new path is a directory
if(is_dir($path))
{
// add the directory details to the file list
$directory_tree[] = array(
'path' => $path,
'name' => end($subdirectories),
'kind' => 'directory',
// we scan the new path by calling this function
'content' => scan_directory_recursively($path, $function));
// if the new path is a file
}
elseif(is_file($path))
{
// get the file extension by taking everything after the last dot
$partsArray = explode('.',end($subdirectories));
$extension = end($partsArray);
// if there is no filter set or the filter is set and matches
if($filter === false || $filter == $extension)
{
$nextFile = array(
'path' => $path,
'name' => end($subdirectories),
'extension' => $extension,
'size' => filesize($path),
'kind' => 'file'
);
if($function != false){
$function($nextFile);
}
$directory_tree[] = $nextFile; // add the file details to the file list
}
}
}
}
}
// close the directory
closedir($directory_list);
// return file list
return $directory_tree;
// if the path is not readable ...
}else{
// ... we return false
return false;
}
}
function checkForBom($filePath, $checkForEnd = false){
// $checkForEnd = false;
// foreach($GLOBALS['directoriesToCheckForEnd'] as $directoryToCheckForEnd){
// if(mb_strpos($filePath, 'php_shared') !== false){
// $checkForEnd = true;
// }
// }
$handle = fopen($filePath, "r");
$byteOne = fread($handle, 1);
$byteEnd = false;
if($checkForEnd == true){
fseek($handle, -1, SEEK_END);
$byteEnd = fread($handle, 1);
}
fclose($handle);
if(ord($byteOne) == 0xef ){
echo "Error - php file ".$filePath." has bom!\r\n";
}
if($checkForEnd == true){
if($byteOne != '<' ){
$errorString = "php file ".$filePath." doesn't start with <, which is weird.\r\n";
echo $errorString;
$GLOBALS['errorInfoArray'][] = $errorString;
}
if($checkForEnd == true){
if($byteEnd != '>' ){
$errorString = "php file ".$filePath." ends with $byteEnd instead of >, which is weird.\r\n";
echo $errorString;
$GLOBALS['errorInfoArray'][] = $errorString;
}
}
}
}
/**
* Scan a file for non-UTF8 characters.
* @param $filename
*/
function checkFileForNonUTF8($filename){
$fileLines = file($filename);
$line = 1;
if($fileLines == false){
echo "Failed to open file [$filename] for checking for ascii only text aborting.";
exit(0);
}
$count = 0;
foreach($fileLines as $fileLine){
//captures non-utf8 chars only in the capturing group 2
$regex = '/([\x00-\x7F] |
[\xC0-\xDF][\x80-\xBF] |
[\xE0-\xEF][\x80-\xBF]{2} |
[\xF0-\xF7][\x80-\xBF]{3})* |
(.*?) |
([\x00-\x7F] |
[\xC0-\xDF][\x80-\xBF] |
[\xE0-\xEF][\x80-\xBF]{2} |
[\xF0-\xF7][\x80-\xBF]{3})*
/xU'; //Ungreedy flag is needed to not crash on long lines
$result = preg_replace($regex, '$2', $fileLine);
$result = trim($result);
if(mb_strlen($result) > 0){
$letter = mb_strpos($fileLine, $result);
//$letter is ambiguous with regard to column. Non-utf8 chars confuse the columns in a document
$errorString = "Non-utf8 character [$result] at line $line, letter count = $letter.\r\n";
echo $errorString;
$GLOBALS['errorInfoArray'][] = $errorString;
$count++;
}
$line++;
}
}
/**
* Check whether a file contains any multi-byte character dangerous functions
* If the line where the function call occurs has 'BYTE_SAFE_PHRASE' on it, no
* error will be reported.
* @param $filePath
*/
function checkForUnsafeFunctions($filePath) {
$functionRegexString = implode('|', array_keys($GLOBALS['unsafeFunctionsReplaceMap']));
$regex = '/[^_\w\']('.$functionRegexString.'){1,1}(?:\w)?\(/xu';
$fileLines = file($filePath);
$line = 1;
if($fileLines == false){
echo "Failed to open file [$filePath] for checking for ascii only text aborting.";
exit(0);
}
foreach($fileLines as $fileLine){
$matches = array();
$result = preg_match($regex, $fileLine, $matches);
if ($result){
if (mb_stripos($fileLine, BYTE_SAFE_PHRASE) === false) {
$functionFound = $matches[1];
echo "Unsafe function [$functionFound] detected on line $line in file $filePath\n";
$replacement = $GLOBALS['unsafeFunctionsReplaceMap'][$functionFound];
if ($replacement != null) {
echo "Please replace with $replacement \n";
$GLOBALS['unsafeFunctionCount']++;
}
}
}
$line++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment