Last active
April 23, 2024 20:16
-
-
Save inventor96/927239a939c9c3d14447446a86a2f7b1 to your computer and use it in GitHub Desktop.
Recursively finds and fixes unquoted array keys in PHP. This is not full-proof, nor does it cover 100% of all occurrences, but it gets really close at getting nearly all of them. See the respective sources for discussion and limitations of each function.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
/** | |
* Run this script on the command line like `php fix_unquoted_array_keys.php './*.php'` | |
* Or make the file executable and run it directly. | |
* | |
* As the first argument after the script name gets passed directly to `rglob()`, it's | |
* recommended that you quote it to prevent the shell from doing the expansion itself. | |
*/ | |
/** | |
* Gets an array of PHP blocks from a string. | |
* | |
* @param string $content The code to check. | |
* @return array The array of PHP code blocks. | |
*/ | |
function get_php_blocks($content) { | |
$blocks = []; | |
$len = strlen($content); | |
$cursor = 0; | |
do { | |
$stepper = ''; | |
$try_again = false; | |
do { | |
// find the block | |
$matches = []; | |
if ($found = preg_match('/(<\?(?:=|php\s).+?'.$stepper.'(?:\?>|$))/ms', $content, $matches, PREG_OFFSET_CAPTURE, $cursor)) { | |
// make sure the end is valid | |
$tokens = @token_get_all($matches[0][0]); | |
$last = array_pop($tokens); | |
$try_again = // try again if... | |
($matches[0][1] + strlen($matches[0][0])) < $len // we're not at the end of the string | |
&& ( | |
!is_array($last) // it's not a single character | |
|| $last[0] !== T_CLOSE_TAG // it's not a close tag | |
); | |
} else { | |
$try_again = false; | |
} | |
if ($try_again) { | |
// add to the stepper | |
$stepper .= '\?>.+?'; | |
} elseif ($found) { | |
// record the block | |
$blocks[] = $matches[0][0]; | |
// update the offset | |
$cursor = $matches[0][1] + strlen($matches[0][0]); // offset + length | |
} else { | |
// there's no more php to search for | |
$cursor = $len; | |
} | |
} while ($try_again); | |
} while ($cursor < $len); | |
return $blocks; | |
} | |
/** | |
* Does the actual work of fixing the unquoted keys. | |
* | |
* @param string $content The content of the file to fix. | |
* @param int &$count The number of changes `preg_replace()` made. | |
* @return string The updated content. | |
* | |
* @see https://stackoverflow.com/a/36753232 | |
* @see https://www.php.net/manual/en/language.variables.basics.php | |
* @see https://stackoverflow.com/a/75590499/3404349 | |
*/ | |
function fix_unquoted_array_keys($content, &$count = 0) { | |
// get all php blocks | |
$blocks = get_php_blocks($content); | |
foreach ($blocks as $block) { | |
// do replacements for the first index level | |
$orig_block = $block; | |
$block = preg_replace('/^!|\$([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\[([a-zA-Z_][a-z0-9_]+)\]/','\$$1[\'$2\']', $block, -1, $c1); | |
$count = $c1; | |
// do replacements for subsequent index levels | |
$block = preg_replace('/\]\[([a-zA-Z_][a-z0-9_]+)\]/', '][\'$1\']', $block, -1, $c2); | |
$count += $c2; | |
// find double-quoted strings | |
if ($count > 0) { | |
$block = preg_replace_callback('/\/\*.*?\*\/(*SKIP)^|\/\/.*?$(*SKIP)^|\'(?>\\\\?.)*?\'(*SKIP)^|"(?>\\\\?.)*?"/ms', function ($matches) { | |
// add brakets to array key references in the string | |
return preg_replace('/(?<![{\\\\])(\$\w+(?:\[\'\w+\'\]){1,})/', '{$1}', $matches[0]); | |
}, $block); | |
} | |
// update it | |
$content = str_replace($orig_block, $block, $content); | |
} | |
return $content; | |
} | |
/** | |
* The "manager" of updating and checking the file. | |
* | |
* @param string $filename The path to the file to fix | |
* @param int $count How many occurrences were updated | |
* @return string The status | |
*/ | |
function fix_file($filename, &$count = 0) { | |
// get file | |
if (!is_file($filename)) { | |
return "Not a valid file!"; | |
} | |
$content = file_get_contents($filename); | |
// do the work | |
try { | |
$content = fix_unquoted_array_keys($content, $count); | |
} catch (Throwable $e) { | |
return $e->getMessage(); | |
} | |
// put it back | |
file_put_contents($filename, $content); | |
// check the file just in case we broke something | |
@exec("php -l ".$filename." 2>&1", $syntax); | |
if ($syntax[0] && strpos($syntax[0], "No syntax errors") === false) { | |
return $syntax[0]; | |
} | |
return "OK"; | |
} | |
/** | |
* Calls `glob()` recursively and returns all files and folders matching `$pattern`. | |
* | |
* @param string $pattern The glob pattern to search for. | |
* @param int $flags Flags supported by `glob()` (except for GLOB_BRACE). | |
* @return array The list of files and folders. | |
* | |
* @see https://stackoverflow.com/a/17161106 | |
*/ | |
function rglob($pattern, $flags = 0) { | |
$files = glob($pattern, $flags); | |
foreach (glob(dirname($pattern).'/*', GLOB_ONLYDIR | GLOB_NOSORT) as $dir) { | |
$files = array_merge( | |
[], | |
...[$files, rglob($dir . "/" . basename($pattern), $flags)] | |
); | |
} | |
return $files; | |
} | |
// skip this file | |
array_shift($argv); | |
// recursively go through files from input | |
$files = $changed = $fixes = 0; | |
foreach ($argv as $param) { | |
foreach (rglob($param) ?: [] as $file) { | |
if (is_dir($file)) { | |
continue; | |
} | |
// trackers | |
++$files; | |
$total = 0; | |
// do the fix | |
$status = fix_file($file, $total); | |
echo "{$file}\t{$status} ({$total})".PHP_EOL; | |
// more trackers | |
$total === 0 ?: ++$changed; | |
$fixes += $total; | |
// exit early if there's an error | |
if ($status !== 'OK') { | |
break 2; | |
} | |
} | |
} | |
echo "\nUpdated {$fixes} occurences in {$changed} files (out of {$files} files scanned).\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment