Skip to content

Instantly share code, notes, and snippets.

@mlconnor
Created September 19, 2012 12:34
Show Gist options
  • Save mlconnor/3749400 to your computer and use it in GitHub Desktop.
Save mlconnor/3749400 to your computer and use it in GitHub Desktop.
Convert apps.js from Wappaylzer into valid JSON
<?php
/**
* Read apps.js from stdin and write out valid JSON to stdout
*/
$wappalyzerJs = readfile('php://input');
$handle = fopen('php://stdin', 'r');
$wappalyzerJs = '';
while(!feof($handle)) {
$wappalyzerJs .= fgets($handle);
}
fclose($handle);
// look for the categories
if ( ! preg_match('/categories\s+=\s+\{([^}]*)\}/msi', $wappalyzerJs, $matches) ) {
die('unable to find categories');
}
//print_r($matches);
$cats = $matches[1];
if ( !preg_match_all("/\s*(\d+)\s*:\s*'([^']*)'/", $cats, $matches) ) {
die('unable to get individual cats');
}
$categories = array();
foreach ($matches[1] as $index => $key) {
$catName = $matches[2][$index];
//print "$key => $catName\n";
$categories[$key] = $catName;
}
//print_r($categories);
// now dig for the apps
if ( ! preg_match('/apps\s+=\s+(\{.*}\s*);/msi', $wappalyzerJs, $matches) ) {
die('couldnt find apps');
}
//print_r($matches);
$apps = $matches[1];
// look for each app
if ( ! preg_match_all("#:\s*(/(?:\\\\/|.)+\/i??)#U", $apps, $matches) ) {
die('no regex');
}
$savedRegex = array();
foreach ($matches[1] as $index => $regMatch) {
//print "$regMatch\n";
$apps = str_replace($regMatch, '"REGEX_' . $index . '"', $apps);
$testStr = '';
$savedRegex[$index] = $regMatch;
preg_match($regMatch, 'testStr');
}
// let's make it valid json
$apps = preg_replace('!/\*.*?\*/!s', '', $apps); // remove multiline comments
$apps = preg_replace("#'#", '"', $apps); // single quotes to doubles
$apps = preg_replace("#(\s?)([a-zA-Z0-9\\-.]+)\s*:\s*#ms", '\\1"\\2": ', $apps); // add quotes to strings without them
$apps = preg_replace("#],(\s*})#", "]\\1", $apps); // remove trailing commas in arrays
$appDefs = json_decode($apps);
$appDefs = walk_recursive($appDefs, function($val) {
global $savedRegex;
if ( preg_match('#REGEX_(\d+)#', $val, $matches) ) {
return $savedRegex[$matches[1]];
} else {
return $val;
}
});
//$finalDefs = stdObject;
$finalDefs->categories = $categories;
$finalDefs->apps = $appDefs;
$prettyJson = pretty_json(json_encode($finalDefs));
// i personally don't like that pretty_json puts all the damn categories
// on different lines. it makes the file way too long and less readable. here's a fix.
$prettyJson = preg_replace('|\s*(,?[0-9]+,?)\s+|ms', '\\1', $prettyJson);
if ( ! json_decode($prettyJson) ) {
die('there was a json decoding error');
}
print $prettyJson;
exit;
/**
* works for json objects. will replace all
* keys and values with the result of the
* closure.
*/
function walk_recursive($obj, $closure) {
if ( is_object($obj) ) {
$newObj = new stdClass();
foreach ($obj as $property => $value) {
$newProperty = $closure($property);
$newValue = walk_recursive($value, $closure);
$newObj->$newProperty = $newValue;
}
return $newObj;
} else if ( is_array($obj) ) {
$newArray = array();
foreach ($obj as $key => $value) {
$key = $closure($key);
$newArray[$key] = walk_recursive($value, $closure);
}
return $newArray;
} else {
return $closure($obj);
}
}
function pretty_json($json) {
$result = '';
$pos = 0;
$strLen = strlen($json);
$indentStr = ' ';
$newLine = "\n";
$prevChar = '';
$outOfQuotes = true;
for ($i=0; $i<=$strLen; $i++) {
// Grab the next character in the string.
$char = substr($json, $i, 1);
// Are we inside a quoted string?
if ($char == '"' && $prevChar != '\\') {
$outOfQuotes = !$outOfQuotes;
// If this character is the end of an element,
// output a new line and indent the next line.
} else if(($char == '}' || $char == ']') && $outOfQuotes) {
$result .= $newLine;
$pos --;
for ($j=0; $j<$pos; $j++) {
$result .= $indentStr;
}
}
// Add the character to the result string.
$result .= $char;
// If the last character was the beginning of an element,
// output a new line and indent the next line.
if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) {
$result .= $newLine;
if ($char == '{' || $char == '[') {
$pos ++;
}
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
}
$prevChar = $char;
}
return $result;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment