Skip to content

Instantly share code, notes, and snippets.

@christianp
Last active March 31, 2017 07:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save christianp/3a6badbc59100f6434b5df5f3c231191 to your computer and use it in GitHub Desktop.
Save christianp/3a6badbc59100f6434b5df5f3c231191 to your computer and use it in GitHub Desktop.
<?php
function find_logs_without_strings($str) {
// this function finds log_<whatever>(, then searches forward until it finds the matching closing bracket.
// it calls itself on the contents of the brackets, so any nested logs are caught
// it doesn't deal with strings, or `log10`. It wouldn't be too hard to add, but the resulting code would be UGLY
$offset = 0;
$len = strlen($str);
while($offset<$len) {
if(preg_match("/log_([\S]+?)\(/",$str,$m,PREG_OFFSET_CAPTURE,$offset)) {
$start = $m[0][1];
$base = $m[1][0];
$open = $start+strlen($m[0][0]);
$offset = $open-1;
$depth = 1;
while($offset<$len && $depth>0) {
if(preg_match("/[\(\)]/",$str,$b,PREG_OFFSET_CAPTURE,$offset+1)) {
$bracket = $b[0][0];
switch($bracket) {
case "(":
$depth += 1;
break;
case ")":
$depth -= 1;
break;
}
$offset = $b[0][1];
} else {
$offset = $len;
}
}
if($depth==0) {
$contents = find_logs_without_strings(substr($str,$open,$offset-$open));
$whole = substr($str,$start,$offset-$start+1);
$new = "lg($contents, $base)";
$str = substr($str,0,$start).$new.substr($str,$offset+1);
$offset = $start + strlen($new);
}
} else {
$offset = $len;
}
}
return $str;
}
// This is from stack_utils
function substring_between($string, $left, $right, $start = 0) {
$start = strpos($string, $left, $start);
if ($start === false) {
return array('', -1, 0);
}
if ($left == $right) {
// Left and right are the same.
$end = strpos($string, $right, $start + 1); // Just go for the next one.
if ($end === false) {
return array('', $start, -1);
}
$end += 1;
} else {
$length = strlen($string);
$nesting = 1;
$end = $start + 1;
while ($nesting > 0 && $end < $length) {
if ($string[$end] == $left) {
$nesting += 1;
} else if ($string[$end] == $right) {
$nesting -= 1;
}
$end++;
}
if ($nesting > 0) {
return array('', -1, -1);
}
}
return array(substr($string, $start, $end - $start), $start, $end - 1);
}
function chris_replace_logs($cmd) {
// Chris's proposed fix of 30/03/2017
// It seems to do some bad voodoo:
// * find all instances of `log_<whatever>(<something>)` - note that the right bracket at the end of the match isn't necessarily the closing bracket for the `log_<whatever>` function
// * then use substring_between to find the position of the `log_<whatever>(`
// * then use substring_between to find the matching closing bracket
// * then use a global string replace to replace all occurrences of exactly this function call with the `lg` version.
// It doesn't cope with nested logs. I'm not sure why - I expected both to be caught by the initial preg match - though PHP's preg functions are a mystery to me.
// It doesn't cope with strings either.
if (preg_match_all("/log_([\S]+?)\(([\S]+?)\)/", $cmd, $found)) {
foreach ($found[0] as $key => $match) {
$argpos = substring_between($cmd, 'log_'.$found[1][$key], ')');
$argval = substring_between($cmd, '(', ')', $argpos[1] + strlen($found[1][$key]));
$match = 'log_'.$found[1][$key].$argval[0];
$sub = 'lg(' . substr($argval[0], 1, -1) . ', ' . $found[1][$key] .')';
$cmd = str_replace($match, $sub, $cmd);
}
}
return $cmd;
}
function log_tokenise($str) {
// Split a string up by the tokens that matter to us: log function names, brackets, and string delimiters (but not preceded by an escaping backslash).
// The resulting array alternates between plain text and tokens
$m = preg_split("/(log_)([\S]+?)\(|(log10\(|[()]|(?<!\\\\)\")/",$str,0,PREG_SPLIT_DELIM_CAPTURE);
return $m;
}
function replace_logs($str) {
// Scan through the list of tokens.
// When we see a log call, push it to a list, along with the current nesting depth.
// When we get back to the depth the log call started at, replace the corresponding start and end tokens with `lg(` and `, <base>)`.
// When we reach a string delimiter, skip ahead to the corresponding end delimiter.
$tokens = log_tokenise($str);
$depth = 0;
$depths = array();
$starts = array();
$bases = array();
$i = 1;
while($i<count($tokens)) {
$token = $tokens[$i];
switch($token) {
case "log10(":
$starts[] = $i;
$depths[] = $depth;
$bases[] = 10;
$depth += 1;
break;
case "log_":
$starts[] = $i;
$depths[] = $depth;
$bases[] = $tokens[$i+1];
$tokens[$i+1] = "";
$depth += 1;
$i += 1;
break;
case "(":
$depth += 1;
break;
case ")":
$depth -= 1;
if(!empty($depths) && $depth == end($depths)) {
array_pop($depths);
$start = array_pop($starts);
$tokens[$start] = "lg(";
$base = array_pop($bases);
$tokens[$i] = ", $base)";
}
break;
case '"':
$i += 2;
while($i<count($tokens) && $tokens[$i]!='"') {
if($tokens[$i]=='log_') { // deal with `log_<base>(`, which produces an extra token and loses the (
$tokens[$i+1] .= "(";
$i += 1;
}
$i += 2;
}
break;
}
$i += 2;
}
return implode("",$tokens);
}
// some test cases, and their expected outputs
$cmds = array(
"log_2(z)" => "lg(z, 2)",
"log_e(z)" => "lg(z, e)",
"log10(z)" => "lg(z, 10)",
"log_2(z+1)" => "lg(z+1, 2)",
"log_2(log_3(z))" => "lg(lg(z, 3), 2)",
"log_pi((y+1)/x^(y))" => "lg((y+1)/x^(y), pi)",
"log_10(x/max(y,x))" => "lg(x/max(y,x), 10)",
"log_5(1/(if \")\" = \")\" then x else y))" => "lg(1/(if \")\" = \")\" then x else y), 5)",
"x+log_2(y+log_3(z))" => "x+lg(y+lg(z, 3), 2)",
"(x+(log_3(y/(log_2(z)+1)))-2)" => "(x+(lg(y/(lg(z, 2)+1), 3))-2)",
"\"log_2(x)\"" => "\"log_2(x)\""
);
// run each of the functions over the cases, show the results, and count successes
$fns = array('chris_replace_logs','find_logs_without_strings','replace_logs');
$max_fn_name = max(array_map('strlen',array_keys($cmds)));
$results = array();
foreach($fns as $fn) {
$successes = 0;
echo "$fn\n";
foreach($cmds as $cmd=>$expect) {
if($cmd) {
$result = $fn($cmd);
$pass = $result==$expect;
$successes += $pass;
echo ($pass ? '✓' : '❌')." ".str_pad($cmd,$max_fn_name)." : ".$result."\n";
}
}
$results[$fn] = $successes;
echo "\n";
}
// show the total successes for each function
$max_fn_name = max(array_map('strlen',$fns));
echo count($cmds)." tests\n";
foreach($fns as $fn) {
echo str_pad($fn,$max_fn_name).": ".($results[$fn]==count($cmds) ? '✓' : '❌')." ".$results[$fn]."\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment