Last active
March 31, 2017 07:04
-
-
Save christianp/3a6badbc59100f6434b5df5f3c231191 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function find_logs_without_strings($str) { | |
// this function finds log_<whatever>(, then searches forward until it finds the matching closing bracket. | |
// it calls itself on the contents of the brackets, so any nested logs are caught | |
// it doesn't deal with strings, or `log10`. It wouldn't be too hard to add, but the resulting code would be UGLY | |
$offset = 0; | |
$len = strlen($str); | |
while($offset<$len) { | |
if(preg_match("/log_([\S]+?)\(/",$str,$m,PREG_OFFSET_CAPTURE,$offset)) { | |
$start = $m[0][1]; | |
$base = $m[1][0]; | |
$open = $start+strlen($m[0][0]); | |
$offset = $open-1; | |
$depth = 1; | |
while($offset<$len && $depth>0) { | |
if(preg_match("/[\(\)]/",$str,$b,PREG_OFFSET_CAPTURE,$offset+1)) { | |
$bracket = $b[0][0]; | |
switch($bracket) { | |
case "(": | |
$depth += 1; | |
break; | |
case ")": | |
$depth -= 1; | |
break; | |
} | |
$offset = $b[0][1]; | |
} else { | |
$offset = $len; | |
} | |
} | |
if($depth==0) { | |
$contents = find_logs_without_strings(substr($str,$open,$offset-$open)); | |
$whole = substr($str,$start,$offset-$start+1); | |
$new = "lg($contents, $base)"; | |
$str = substr($str,0,$start).$new.substr($str,$offset+1); | |
$offset = $start + strlen($new); | |
} | |
} else { | |
$offset = $len; | |
} | |
} | |
return $str; | |
} | |
// This is from stack_utils | |
function substring_between($string, $left, $right, $start = 0) { | |
$start = strpos($string, $left, $start); | |
if ($start === false) { | |
return array('', -1, 0); | |
} | |
if ($left == $right) { | |
// Left and right are the same. | |
$end = strpos($string, $right, $start + 1); // Just go for the next one. | |
if ($end === false) { | |
return array('', $start, -1); | |
} | |
$end += 1; | |
} else { | |
$length = strlen($string); | |
$nesting = 1; | |
$end = $start + 1; | |
while ($nesting > 0 && $end < $length) { | |
if ($string[$end] == $left) { | |
$nesting += 1; | |
} else if ($string[$end] == $right) { | |
$nesting -= 1; | |
} | |
$end++; | |
} | |
if ($nesting > 0) { | |
return array('', -1, -1); | |
} | |
} | |
return array(substr($string, $start, $end - $start), $start, $end - 1); | |
} | |
function chris_replace_logs($cmd) { | |
// Chris's proposed fix of 30/03/2017 | |
// It seems to do some bad voodoo: | |
// * find all instances of `log_<whatever>(<something>)` - note that the right bracket at the end of the match isn't necessarily the closing bracket for the `log_<whatever>` function | |
// * then use substring_between to find the position of the `log_<whatever>(` | |
// * then use substring_between to find the matching closing bracket | |
// * then use a global string replace to replace all occurrences of exactly this function call with the `lg` version. | |
// It doesn't cope with nested logs. I'm not sure why - I expected both to be caught by the initial preg match - though PHP's preg functions are a mystery to me. | |
// It doesn't cope with strings either. | |
if (preg_match_all("/log_([\S]+?)\(([\S]+?)\)/", $cmd, $found)) { | |
foreach ($found[0] as $key => $match) { | |
$argpos = substring_between($cmd, 'log_'.$found[1][$key], ')'); | |
$argval = substring_between($cmd, '(', ')', $argpos[1] + strlen($found[1][$key])); | |
$match = 'log_'.$found[1][$key].$argval[0]; | |
$sub = 'lg(' . substr($argval[0], 1, -1) . ', ' . $found[1][$key] .')'; | |
$cmd = str_replace($match, $sub, $cmd); | |
} | |
} | |
return $cmd; | |
} | |
function log_tokenise($str) { | |
// Split a string up by the tokens that matter to us: log function names, brackets, and string delimiters (but not preceded by an escaping backslash). | |
// The resulting array alternates between plain text and tokens | |
$m = preg_split("/(log_)([\S]+?)\(|(log10\(|[()]|(?<!\\\\)\")/",$str,0,PREG_SPLIT_DELIM_CAPTURE); | |
return $m; | |
} | |
function replace_logs($str) { | |
// Scan through the list of tokens. | |
// When we see a log call, push it to a list, along with the current nesting depth. | |
// When we get back to the depth the log call started at, replace the corresponding start and end tokens with `lg(` and `, <base>)`. | |
// When we reach a string delimiter, skip ahead to the corresponding end delimiter. | |
$tokens = log_tokenise($str); | |
$depth = 0; | |
$depths = array(); | |
$starts = array(); | |
$bases = array(); | |
$i = 1; | |
while($i<count($tokens)) { | |
$token = $tokens[$i]; | |
switch($token) { | |
case "log10(": | |
$starts[] = $i; | |
$depths[] = $depth; | |
$bases[] = 10; | |
$depth += 1; | |
break; | |
case "log_": | |
$starts[] = $i; | |
$depths[] = $depth; | |
$bases[] = $tokens[$i+1]; | |
$tokens[$i+1] = ""; | |
$depth += 1; | |
$i += 1; | |
break; | |
case "(": | |
$depth += 1; | |
break; | |
case ")": | |
$depth -= 1; | |
if(!empty($depths) && $depth == end($depths)) { | |
array_pop($depths); | |
$start = array_pop($starts); | |
$tokens[$start] = "lg("; | |
$base = array_pop($bases); | |
$tokens[$i] = ", $base)"; | |
} | |
break; | |
case '"': | |
$i += 2; | |
while($i<count($tokens) && $tokens[$i]!='"') { | |
if($tokens[$i]=='log_') { // deal with `log_<base>(`, which produces an extra token and loses the ( | |
$tokens[$i+1] .= "("; | |
$i += 1; | |
} | |
$i += 2; | |
} | |
break; | |
} | |
$i += 2; | |
} | |
return implode("",$tokens); | |
} | |
// some test cases, and their expected outputs | |
$cmds = array( | |
"log_2(z)" => "lg(z, 2)", | |
"log_e(z)" => "lg(z, e)", | |
"log10(z)" => "lg(z, 10)", | |
"log_2(z+1)" => "lg(z+1, 2)", | |
"log_2(log_3(z))" => "lg(lg(z, 3), 2)", | |
"log_pi((y+1)/x^(y))" => "lg((y+1)/x^(y), pi)", | |
"log_10(x/max(y,x))" => "lg(x/max(y,x), 10)", | |
"log_5(1/(if \")\" = \")\" then x else y))" => "lg(1/(if \")\" = \")\" then x else y), 5)", | |
"x+log_2(y+log_3(z))" => "x+lg(y+lg(z, 3), 2)", | |
"(x+(log_3(y/(log_2(z)+1)))-2)" => "(x+(lg(y/(lg(z, 2)+1), 3))-2)", | |
"\"log_2(x)\"" => "\"log_2(x)\"" | |
); | |
// run each of the functions over the cases, show the results, and count successes | |
$fns = array('chris_replace_logs','find_logs_without_strings','replace_logs'); | |
$max_fn_name = max(array_map('strlen',array_keys($cmds))); | |
$results = array(); | |
foreach($fns as $fn) { | |
$successes = 0; | |
echo "$fn\n"; | |
foreach($cmds as $cmd=>$expect) { | |
if($cmd) { | |
$result = $fn($cmd); | |
$pass = $result==$expect; | |
$successes += $pass; | |
echo ($pass ? '✓' : '❌')." ".str_pad($cmd,$max_fn_name)." : ".$result."\n"; | |
} | |
} | |
$results[$fn] = $successes; | |
echo "\n"; | |
} | |
// show the total successes for each function | |
$max_fn_name = max(array_map('strlen',$fns)); | |
echo count($cmds)." tests\n"; | |
foreach($fns as $fn) { | |
echo str_pad($fn,$max_fn_name).": ".($results[$fn]==count($cmds) ? '✓' : '❌')." ".$results[$fn]."\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment