Created
September 19, 2012 12:34
-
-
Save mlconnor/3749400 to your computer and use it in GitHub Desktop.
Convert apps.js from Wappaylzer into valid JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Read apps.js from stdin and write out valid JSON to stdout | |
*/ | |
$wappalyzerJs = readfile('php://input'); | |
$handle = fopen('php://stdin', 'r'); | |
$wappalyzerJs = ''; | |
while(!feof($handle)) { | |
$wappalyzerJs .= fgets($handle); | |
} | |
fclose($handle); | |
// look for the categories | |
if ( ! preg_match('/categories\s+=\s+\{([^}]*)\}/msi', $wappalyzerJs, $matches) ) { | |
die('unable to find categories'); | |
} | |
//print_r($matches); | |
$cats = $matches[1]; | |
if ( !preg_match_all("/\s*(\d+)\s*:\s*'([^']*)'/", $cats, $matches) ) { | |
die('unable to get individual cats'); | |
} | |
$categories = array(); | |
foreach ($matches[1] as $index => $key) { | |
$catName = $matches[2][$index]; | |
//print "$key => $catName\n"; | |
$categories[$key] = $catName; | |
} | |
//print_r($categories); | |
// now dig for the apps | |
if ( ! preg_match('/apps\s+=\s+(\{.*}\s*);/msi', $wappalyzerJs, $matches) ) { | |
die('couldnt find apps'); | |
} | |
//print_r($matches); | |
$apps = $matches[1]; | |
// look for each app | |
if ( ! preg_match_all("#:\s*(/(?:\\\\/|.)+\/i??)#U", $apps, $matches) ) { | |
die('no regex'); | |
} | |
$savedRegex = array(); | |
foreach ($matches[1] as $index => $regMatch) { | |
//print "$regMatch\n"; | |
$apps = str_replace($regMatch, '"REGEX_' . $index . '"', $apps); | |
$testStr = ''; | |
$savedRegex[$index] = $regMatch; | |
preg_match($regMatch, 'testStr'); | |
} | |
// let's make it valid json | |
$apps = preg_replace('!/\*.*?\*/!s', '', $apps); // remove multiline comments | |
$apps = preg_replace("#'#", '"', $apps); // single quotes to doubles | |
$apps = preg_replace("#(\s?)([a-zA-Z0-9\\-.]+)\s*:\s*#ms", '\\1"\\2": ', $apps); // add quotes to strings without them | |
$apps = preg_replace("#],(\s*})#", "]\\1", $apps); // remove trailing commas in arrays | |
$appDefs = json_decode($apps); | |
$appDefs = walk_recursive($appDefs, function($val) { | |
global $savedRegex; | |
if ( preg_match('#REGEX_(\d+)#', $val, $matches) ) { | |
return $savedRegex[$matches[1]]; | |
} else { | |
return $val; | |
} | |
}); | |
//$finalDefs = stdObject; | |
$finalDefs->categories = $categories; | |
$finalDefs->apps = $appDefs; | |
$prettyJson = pretty_json(json_encode($finalDefs)); | |
// i personally don't like that pretty_json puts all the damn categories | |
// on different lines. it makes the file way too long and less readable. here's a fix. | |
$prettyJson = preg_replace('|\s*(,?[0-9]+,?)\s+|ms', '\\1', $prettyJson); | |
if ( ! json_decode($prettyJson) ) { | |
die('there was a json decoding error'); | |
} | |
print $prettyJson; | |
exit; | |
/** | |
* works for json objects. will replace all | |
* keys and values with the result of the | |
* closure. | |
*/ | |
function walk_recursive($obj, $closure) { | |
if ( is_object($obj) ) { | |
$newObj = new stdClass(); | |
foreach ($obj as $property => $value) { | |
$newProperty = $closure($property); | |
$newValue = walk_recursive($value, $closure); | |
$newObj->$newProperty = $newValue; | |
} | |
return $newObj; | |
} else if ( is_array($obj) ) { | |
$newArray = array(); | |
foreach ($obj as $key => $value) { | |
$key = $closure($key); | |
$newArray[$key] = walk_recursive($value, $closure); | |
} | |
return $newArray; | |
} else { | |
return $closure($obj); | |
} | |
} | |
function pretty_json($json) { | |
$result = ''; | |
$pos = 0; | |
$strLen = strlen($json); | |
$indentStr = ' '; | |
$newLine = "\n"; | |
$prevChar = ''; | |
$outOfQuotes = true; | |
for ($i=0; $i<=$strLen; $i++) { | |
// Grab the next character in the string. | |
$char = substr($json, $i, 1); | |
// Are we inside a quoted string? | |
if ($char == '"' && $prevChar != '\\') { | |
$outOfQuotes = !$outOfQuotes; | |
// If this character is the end of an element, | |
// output a new line and indent the next line. | |
} else if(($char == '}' || $char == ']') && $outOfQuotes) { | |
$result .= $newLine; | |
$pos --; | |
for ($j=0; $j<$pos; $j++) { | |
$result .= $indentStr; | |
} | |
} | |
// Add the character to the result string. | |
$result .= $char; | |
// If the last character was the beginning of an element, | |
// output a new line and indent the next line. | |
if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { | |
$result .= $newLine; | |
if ($char == '{' || $char == '[') { | |
$pos ++; | |
} | |
for ($j = 0; $j < $pos; $j++) { | |
$result .= $indentStr; | |
} | |
} | |
$prevChar = $char; | |
} | |
return $result; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment