mlconnor/convert_apps_js.php

## convert_apps_js.php
<?php

  /**
   * Read apps.js from stdin and write out valid JSON to stdout
   */

  $wappalyzerJs = readfile('php://input');
  $handle = fopen('php://stdin', 'r');
  $wappalyzerJs = '';
  while(!feof($handle)) {
    $wappalyzerJs .= fgets($handle);
  }
  fclose($handle);

  // look for the categories
  if ( ! preg_match('/categories\s+=\s+\{([^}]*)\}/msi', $wappalyzerJs, $matches) ) {
    die('unable to find categories');
  }
  //print_r($matches);

  $cats = $matches[1];
  if ( !preg_match_all("/\s*(\d+)\s*:\s*'([^']*)'/", $cats, $matches) ) {
    die('unable to get individual cats');
  }
  $categories = array();
  foreach ($matches[1] as $index => $key) {
    $catName = $matches[2][$index];
    //print "$key => $catName\n";
    $categories[$key] = $catName;
  }
  //print_r($categories);

  // now dig for the apps
  if ( ! preg_match('/apps\s+=\s+(\{.*}\s*);/msi', $wappalyzerJs, $matches) ) {
    die('couldnt find apps');
  }

  //print_r($matches);
  $apps = $matches[1];

  // look for each app
  if ( ! preg_match_all("#:\s*(/(?:\\\\/|.)+\/i??)#U", $apps, $matches) ) {
    die('no regex');
  }

  $savedRegex = array();
  foreach ($matches[1] as $index => $regMatch) {
    //print "$regMatch\n";
    $apps = str_replace($regMatch, '"REGEX_' . $index . '"', $apps);
    $testStr = '';
    $savedRegex[$index] = $regMatch;
    preg_match($regMatch, 'testStr');
  }

  // let's make it valid json
  $apps = preg_replace('!/\*.*?\*/!s', '', $apps); // remove multiline comments
  $apps = preg_replace("#'#", '"', $apps); // single quotes to doubles
  $apps = preg_replace("#(\s?)([a-zA-Z0-9\\-.]+)\s*:\s*#ms", '\\1"\\2": ', $apps); // add quotes to strings without them
  $apps = preg_replace("#],(\s*})#", "]\\1", $apps); // remove trailing commas in arrays

  $appDefs = json_decode($apps);
  $appDefs = walk_recursive($appDefs, function($val) {
    global $savedRegex;
    if ( preg_match('#REGEX_(\d+)#', $val, $matches) ) {
      return $savedRegex[$matches[1]];
    } else {
      return $val;
    }
  });

  //$finalDefs = stdObject;
  $finalDefs->categories = $categories;
  $finalDefs->apps = $appDefs;

  $prettyJson = pretty_json(json_encode($finalDefs));

  // i personally don't like that pretty_json puts all the damn categories
  // on different lines.  it makes the file way too long and less readable.  here's a fix.
  $prettyJson = preg_replace('|\s*(,?[0-9]+,?)\s+|ms', '\\1', $prettyJson);
  if ( ! json_decode($prettyJson) ) {
    die('there was a json decoding error');
  }
  print $prettyJson;

  exit;

/**
* works for json objects. will replace all
* keys and values with the result of the
* closure.
*/
function walk_recursive($obj, $closure) {
  if ( is_object($obj) ) {
    $newObj = new stdClass();
    foreach ($obj as $property => $value) {
      $newProperty = $closure($property);
      $newValue = walk_recursive($value, $closure);
      $newObj->$newProperty = $newValue;
    }
    return $newObj;
  } else if ( is_array($obj) ) {
    $newArray = array();
    foreach ($obj as $key => $value) {
      $key = $closure($key);
      $newArray[$key] = walk_recursive($value, $closure);
    }
    return $newArray;
  } else {
    return $closure($obj);
  }
}


function pretty_json($json) {

  $result = '';
  $pos = 0;
  $strLen = strlen($json);
  $indentStr = ' ';
  $newLine = "\n";
  $prevChar = '';
  $outOfQuotes = true;

  for ($i=0; $i<=$strLen; $i++) {

    // Grab the next character in the string.
    $char = substr($json, $i, 1);

    // Are we inside a quoted string?
    if ($char == '"' && $prevChar != '\\') {
      $outOfQuotes = !$outOfQuotes;

      // If this character is the end of an element,
      // output a new line and indent the next line.
    } else if(($char == '}' || $char == ']') && $outOfQuotes) {
      $result .= $newLine;
      $pos --;
      for ($j=0; $j<$pos; $j++) {
	$result .= $indentStr;
      }
    }

    // Add the character to the result string.
    $result .= $char;

    // If the last character was the beginning of an element,
    // output a new line and indent the next line.
    if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) {
      $result .= $newLine;
      if ($char == '{' || $char == '[') {
	$pos ++;
      }

      for ($j = 0; $j < $pos; $j++) {
	$result .= $indentStr;
      }
    }

    $prevChar = $char;
  }

  return $result;
}

?>
	<?php

	/**
	* Read apps.js from stdin and write out valid JSON to stdout
	*/

	$wappalyzerJs = readfile('php://input');
	$handle = fopen('php://stdin', 'r');
	$wappalyzerJs = '';
	while(!feof($handle)) {
	$wappalyzerJs .= fgets($handle);
	}
	fclose($handle);

	// look for the categories
	if ( ! preg_match('/categories\s+=\s+\{([^}]*)\}/msi', $wappalyzerJs, $matches) ) {
	die('unable to find categories');
	}
	//print_r($matches);

	$cats = $matches[1];
	if ( !preg_match_all("/\s(\d+)\s:\s'([^'])'/", $cats, $matches) ) {
	die('unable to get individual cats');
	}
	$categories = array();
	foreach ($matches[1] as $index => $key) {
	$catName = $matches[2][$index];
	//print "$key => $catName\n";
	$categories[$key] = $catName;
	}
	//print_r($categories);

	// now dig for the apps
	if ( ! preg_match('/apps\s+=\s+(\{.}\s);/msi', $wappalyzerJs, $matches) ) {
	die('couldnt find apps');
	}

	//print_r($matches);
	$apps = $matches[1];

	// look for each app
	if ( ! preg_match_all("#:\s*(/(?:\\\\/\|.)+\/i??)#U", $apps, $matches) ) {
	die('no regex');
	}

	$savedRegex = array();
	foreach ($matches[1] as $index => $regMatch) {
	//print "$regMatch\n";
	$apps = str_replace($regMatch, '"REGEX_' . $index . '"', $apps);
	$testStr = '';
	$savedRegex[$index] = $regMatch;
	preg_match($regMatch, 'testStr');
	}

	// let's make it valid json
	$apps = preg_replace('!/\.?\*/!s', '', $apps); // remove multiline comments
	$apps = preg_replace("#'#", '"', $apps); // single quotes to doubles
	$apps = preg_replace("#(\s?)([a-zA-Z0-9\\-.]+)\s:\s#ms", '\\1"\\2": ', $apps); // add quotes to strings without them
	$apps = preg_replace("#],(\s*})#", "]\\1", $apps); // remove trailing commas in arrays

	$appDefs = json_decode($apps);
	$appDefs = walk_recursive($appDefs, function($val) {
	global $savedRegex;
	if ( preg_match('#REGEX_(\d+)#', $val, $matches) ) {
	return $savedRegex[$matches[1]];
	} else {
	return $val;
	}
	});

	//$finalDefs = stdObject;
	$finalDefs->categories = $categories;
	$finalDefs->apps = $appDefs;

	$prettyJson = pretty_json(json_encode($finalDefs));

	// i personally don't like that pretty_json puts all the damn categories
	// on different lines. it makes the file way too long and less readable. here's a fix.
	$prettyJson = preg_replace('\|\s*(,?[0-9]+,?)\s+\|ms', '\\1', $prettyJson);
	if ( ! json_decode($prettyJson) ) {
	die('there was a json decoding error');
	}
	print $prettyJson;

	exit;

	/**
	* works for json objects. will replace all
	* keys and values with the result of the
	* closure.
	*/
	function walk_recursive($obj, $closure) {
	if ( is_object($obj) ) {
	$newObj = new stdClass();
	foreach ($obj as $property => $value) {
	$newProperty = $closure($property);
	$newValue = walk_recursive($value, $closure);
	$newObj->$newProperty = $newValue;
	}
	return $newObj;
	} else if ( is_array($obj) ) {
	$newArray = array();
	foreach ($obj as $key => $value) {
	$key = $closure($key);
	$newArray[$key] = walk_recursive($value, $closure);
	}
	return $newArray;
	} else {
	return $closure($obj);
	}
	}


	function pretty_json($json) {

	$result = '';
	$pos = 0;
	$strLen = strlen($json);
	$indentStr = ' ';
	$newLine = "\n";
	$prevChar = '';
	$outOfQuotes = true;

	for ($i=0; $i<=$strLen; $i++) {

	// Grab the next character in the string.
	$char = substr($json, $i, 1);

	// Are we inside a quoted string?
	if ($char == '"' && $prevChar != '\\') {
	$outOfQuotes = !$outOfQuotes;

	// If this character is the end of an element,
	// output a new line and indent the next line.
	} else if(($char == '}' \|\| $char == ']') && $outOfQuotes) {
	$result .= $newLine;
	$pos --;
	for ($j=0; $j<$pos; $j++) {
	$result .= $indentStr;
	}
	}

	// Add the character to the result string.
	$result .= $char;

	// If the last character was the beginning of an element,
	// output a new line and indent the next line.
	if (($char == ',' \|\| $char == '{' \|\| $char == '[') && $outOfQuotes) {
	$result .= $newLine;
	if ($char == '{' \|\| $char == '[') {
	$pos ++;
	}

	for ($j = 0; $j < $pos; $j++) {
	$result .= $indentStr;
	}
	}

	$prevChar = $char;
	}

	return $result;
	}

	?>