Skip to content

Instantly share code, notes, and snippets.

@SOF3
Forked from shoghicp/run.php
Created September 25, 2016 16:38
Show Gist options
  • Save SOF3/5a2621d7de7f8aaf2f5bbd389f386ba7 to your computer and use it in GitHub Desktop.
Save SOF3/5a2621d7de7f8aaf2f5bbd389f386ba7 to your computer and use it in GitHub Desktop.
Header Extractor - Usage: php run.php --input libminecraftpe.so --output headers/ --pointer-size 4 --asm
<?php
/**
* Minecraft: Pocket Edition header extractor
*
* This tool needs objdump (and objdump-multiarch + arm variants) installed on the current system
*
*/
const VERSION = "0.0.1";
$baseHeaders = [
"std::string" => "<string>",
"std::shared_ptr" => "<memory>",
"std::unique_ptr" => "<memory>",
"std::vector" => "<vector>",
"std::array" => "<array>",
"std::unordered_map" => "<unordered_map>",
"std::map" => "<map>",
"std::pair" => "<utility>",
];
function printType($typeInfo, $name = ""){
return $typeInfo["type"].
($typeInfo["reference"] ? str_repeat("&", $typeInfo["reference"]) : "").
($typeInfo["pointer"] ? str_repeat("*", $typeInfo["pointer"]) : "").
($name !== "" ? " " . $name : "").
($typeInfo["array"] ? ($typeInfo["array"] === "n" ? "[]" : "[" . $typeInfo["array"] . "]" ) : "");
}
function printData($data, $typeInfo){
switch($typeInfo["type"]){
case "bool":
return ord($data) > 0;
break;
case "long double":
case "double":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 3 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("d", substr($data, $i * 4, 4))[1];
}
return "{ " . implode("l, ", $d) . "}";
}else{
return unpack("d", $data)[1] . "l";
}
break;
case "float":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("f", substr($data, $i * 4, 4))[1];
}
return "{ " . implode("f, ", $d) . "}";
}else{
return unpack("f", $data)[1] . "f";
}
break;
case "unsigned int":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("V", substr($data, $i * 4, 4))[1];
}
return "{ " . implode(", ", $d) . "}";
}else{
return unpack("V", $data)[1];
}
break;
case "int":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = unpack("V", substr($data, $i * 4, 4))[1];
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 32 >> 32;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = unpack("V", $data)[1];
if(PHP_INT_SIZE === 8){
$d = $d << 32 >> 32;
}
return $d;
}
break;
case "unsigned short":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 1 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("v", substr($data, $i * 2, 2))[1];
}
return "{ " . implode(", ", $d) . "}";
}else{
return unpack("v", $data)[1];
}
break;
case "short":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 1 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = unpack("v", substr($data, $i * 2, 2))[1];
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 48 >> 48;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = unpack("v", $data)[1];
if(PHP_INT_SIZE === 8){
$d = $d << 48 >> 48;
}
return $d;
}
break;
case "unsigned char":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = ord($data{$i});
}
return "{ " . implode(", ", $d) . "}";
}else{
return ord($data);
}
case "char":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = ord($data{$i});
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 63 >> 63;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = ord($data);
if(PHP_INT_SIZE === 8){
$d = $d << 63 >> 63;
}
return $d;
}
break;
}
return "0x" . bin2hex($data);
}
function parseStruct($path){
$fp = @fopen($path, "r");
if(!is_resource($fp)){
return [];
}
return (array) @json_decode(str_replace("\t", " ", stream_get_contents($fp)), true);
}
function fixIncludeName($str){
return str_replace("::", "/", $str);
}
function fixName($str){
$name = str_replace(["*", "&", "(", ")", " ", "const"], "", $str);
if(($pos = strpos($name, "<")) !== false){
$name = substr($name, 0, $pos);
}
return $name;
}
function runCommand($cmd, &$exitCode = null){
ob_start();
passthru($cmd, $exitCode);
$output = ob_get_contents();
ob_end_clean();
return $output;
}
function parseDataHex($data, &$content, &$dOffset){
foreach(explode("\n", $data) as $line){
if($line !== "" and $line{0} === " "){
if(preg_match("/([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) .*/", $line, $matches) > 0){
$offset = hexdec($matches[1]);
if($dOffset === 0){
$dOffset = $offset;
}
$content .= hex2bin($matches[2]) . hex2bin($matches[3]) . hex2bin($matches[4]) . hex2bin($matches[5]);
}
}
}
}
function getInitializedDataTable($offset, $length = null, $ignoreNull = false){
global $initializedDataOffset, $initializedData;
$offset -= $initializedDataOffset;
if(!$ignoreNull and substr($initializedData, $offset, POINTER_SIZE) !== str_repeat("\x00", POINTER_SIZE)){
return null;
}
if($length === null){
$length = 0;
for($i = 1; $i <= 512; ++$i){
if(substr($initializedData, $offset + $i * POINTER_SIZE, POINTER_SIZE) === str_repeat("\x00", POINTER_SIZE)){
$length = $i * POINTER_SIZE;
break;
}
}
}
return substr($initializedData, $offset, $length);
}
function getDataTable($offset, $length = null, $ignoreNull = false){
global $rodataOffset, $rodata;
$offset -= $rodataOffset;
if(!$ignoreNull and substr($rodata, $offset, POINTER_SIZE) !== str_repeat("\x00", POINTER_SIZE)){
return null;
}
if($length === null){
$length = 0;
for($i = 1; $i <= 512; ++$i){
if(substr($rodata, $offset + $i * POINTER_SIZE, POINTER_SIZE) === str_repeat("\x00", POINTER_SIZE)){
$length = $i * POINTER_SIZE;
break;
}
}
}
return substr($rodata, $offset, $length);
}
function parseTypeInfo($str){
$data = [];
foreach(str_split($str, POINTER_SIZE) as $i => $d){
$data[$i] = unpack("V", $d)[1] & (~(POINTER_SIZE - 1));
}
return $data;
}
function parseVtable($str){
$data = [];
foreach(str_split($str, POINTER_SIZE) as $i => $d){
$data[$i] = unpack("V", $d)[1] & (~(POINTER_SIZE - 1));
}
return $data;
}
$opts = getopt("", ["input:", "output:", "dwarf", "struct", "asm", "help", "pointer-size:"]);
if(!isset($opts["input"]) or !isset($opts["output"]) or !file_exists($opts["input"])){
$opts["help"] = true;
}
if(isset($opts["help"])){
echo "Minecraft: Pocket Edition header extractor" . PHP_EOL;
echo "\tversion ". VERSION . PHP_EOL;
echo PHP_EOL;
echo "Usage: ". PHP_BINARY . $argv[0] . "--input libminecraft.so --output /output/path [options]" . PHP_EOL;
echo "\t--input => libminecraft.so file path" . PHP_EOL;
echo "\t--output => output directory for headers" . PHP_EOL;
echo "\t--pointer-size => sets the pointer size (default 4)" . PHP_EOL;
echo "\t--struct => use struct generation files" . PHP_EOL;
echo "\t--dwarf => use DWARF symbols if available" . PHP_EOL;
echo "\t--asm => include method usage information and assembly" . PHP_EOL;
echo "\t--help => Show this screen" . PHP_EOL;
echo PHP_EOL;
die();
}
ini_set("memory_limit", -1);
define("USE_STRUCTS", isset($opts["struct"]) ? true : false);
define("USE_ASM", isset($opts["asm"]) ? true : false);
define("POINTER_SIZE", isset($opts["pointer-size"]) ? (int) $opts["pointer-size"] : 4);
$path = $opts["input"];
$out = $opts["output"] . "/";
echo "Getting symbol information..." . PHP_EOL;
$result = runCommand("objdump --dynamic-syms --demangle=auto {$path}");
echo "Getting initialized .data.rel.ro information..." . PHP_EOL;
$initializedData = "";
$initializedDataOffset = 0;
parseDataHex(runCommand("objdump -s -j .data.rel.ro {$path}"), $initializedData, $initializedDataOffset);
$initializedDataLookup = [];
$typeinfoLookup = [];
echo "Getting .rodata information..." . PHP_EOL;
$rodata = "";
$rodataOffset = 0;
parseDataHex(runCommand("objdump -s -j .rodata {$path}"), $rodata, $rodataOffset);
$methodLookup = [];
$dwarf = "";
$asm = [];
if(USE_ASM){
echo "Getting ASM information..." . PHP_EOL;
$state = 0;
$entry = "";
$entries = [];
foreach(explode("\n", runCommand("objdump --demangle=auto --disassemble-zeroes --source {$path}")) as $line){
$line = trim($line);
if($line === ""){
if(count($entries) > 0){
$asm[$entry] = $entries;
$entries = [];
$entry = "";
}
$state = 0;
continue;
}
if($state === 0 and preg_match("/^([0-9a-f]+) <(.*)>:$/", $line, $matches) > 0){
$state = 1;
$entry = $matches[2];
}elseif($state === 1 and preg_match("/^([0-9a-f]+):[ \t]+([0-9a-f]+.*)$/", $line, $matches) > 0){
$entries[$matches[1]] = trim($matches[2]);
}
}
}
@mkdir($out, 0777);
$contents = [
"common_header" => []
];
$indexLookup = [];
$structInfo = [];
if(USE_STRUCTS){
$structInfo = parseStruct($out . "struct.json");
}
foreach(explode("\n", $result) as $line){
if(preg_match("/([0-9a-f]+)[a-zA-Z \t]+(\\.[a-z\\.]+)[ \t]+([0-9a-f]+)( Base |) (.*)/", trim($line), $matches) > 0){
$c = trim($matches[5]);
$targetCall = ($matches[2] === ".rodata" ? "getDataTable" : "getInitializedDataTable");
if(substr($c, 0, 6) === "vtable"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]));
if($info !== null){
$data = parseVtable($info);
$initializedDataLookup[fixName(substr($c, 11))] = $data;
}
continue;
}
if(substr($c, 0, 13) === "typeinfo name"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]), true);
if($info !== null){
$info = ltrim($info, "0123456789");
//var_dump($info);
//die();
//$data = parseTypeInfo($info);
//$typeInfoLookup[fixName(substr($c, 13))] = $data;
}
continue;
}elseif(substr($c, 0, 8) === "typeinfo"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]), true);
}
if(substr($c, 0, 5) === "void "){
$c = substr($c, 5);
}
if(substr($c, 0, 5) === "std::"
or substr($c, 0, 2) === "__"){
$contents["common_header"][$c] = $c;
continue;
}
if(strpos($c, "::") === false){
continue;
}
$methodLookup[$off = hexdec($matches[1])] = $c;
$d = explode("::", ($pos = strpos($c, "(")) !== false ? substr($c, 0, $pos) : $c);
$second = trim(array_pop($d));
$first = trim(implode("::", $d));
if($first === ""){
$contents["common_header"][$c] = $c;
continue;
}
$c = substr($c, strlen($first) + 2);
$first = fixName($first);
if(!isset($contents[$first])){
$contents[$first] = [];
$indexLookup[$first] = [];
}
$contents[$first][$c] = $c;
if($matches[2] === ".rodata"){
$indexLookup[$first][$c] = substr($rodata, hexdec($matches[1]) - $rodataOffset, hexdec($matches[3]));
}elseif($matches[2] === ".bss"){
$indexLookup[$first][$c] = hexdec($matches[3]);
}
}
}
$processedFiles = [];
$override = [];
$headers = [];
$parents = [];
do{
$lastCount = 0;
foreach($contents as $n => $list){
$hName = fixIncludeName($n);
asort($list);
$override[$n] = [];
$headers[$n] = [];
$parents[$n] = [];
$continue = true;
if(isset($initializedDataLookup[$n])){
foreach($initializedDataLookup[$n] as $i => $p){
if(isset($methodLookup[$p])){
$c = $methodLookup[$p];
$d = explode("::", $c);
$second = trim(array_pop($d));
$first = trim(implode("::", $d));
if($first !== $n and isset($contents[$first])){
if(!isset($processedFiles[$first])){
$continue = false;
break;
}
$parents[$n][$first] = $first;
$headers[$n][$first] = $first;
foreach($list as $k){
if(isset($contents[$first][$k])){
$override[$n][$k] = true;
}
}
}
}
}
}
if(!$continue){
unset($contents[$n]);
$contents[$n] = $list;
continue;
}
if(strpos($hName, "/") !== false){
@mkdir(dirname($out . $hName), 0777, true);
}
if(!isset($structInfo[$n])){
$structInfo[$n] = [];
}
if(!isset($structInfo[$n][$n])){
$classInfo = [
"__size" => 0
];
$structInfo[$n][$n] = $classInfo;
if(USE_STRUCTS){
echo "\rNew class $n";
}
}else{
$classInfo = $structInfo[$n][$n];
if(!isset($classInfo["__size"])){
$classInfo["__size"] = 0;
}
}
$totalSize = 0;
foreach($classInfo as $v => $k){
if($v === "__size"){
continue;
}else{
if($k["pointer"]){
$totalSize += POINTER_SIZE;
continue;
}
$size = 0;
switch($k["type"]){
case "bool":
case "char":
case "unsigned char":
$size = 1;
break;
case "short":
case "unsigned short":
$size = 2;
break;
case "int":
case "unsigned int":
case "float":
$size = 4;
break;
case "long double":
case "double":
$size = 8;
break;
default:
if($k["type"] !== $n and isset($contents[$k["type"]]) and !isset($processedFiles[$k["type"]])){
$continue = false;
break;
}elseif(isset($contents[$k["type"]])){
$headers[$n][$k["type"]] = $k["type"];
$size = $structInfo[$k["type"]][$k["type"]]["__size"];
}
}
if($k["array"]){
if($k["array"] === "n"){
continue;
}else{
$totalSize += $size * $k["array"];
}
}else{
$totalSize += $size;
}
}
}
if(!$continue){
unset($contents[$n]);
$contents[$n] = $list;
continue;
}
if($classInfo["__size"] !== 0 and $totalSize !== $classInfo["__size"]){
echo "\rClass $n size differs, old: ".$classInfo["__size"].", new $totalSize" . PHP_EOL;
}else{
$classInfo["__size"] = $totalSize;
}
//Correct shared parents
foreach($parents[$n] as $i => $p){
foreach($parents[$n] as $i2 => $p2){
if(isset($parents[$p2][$p])){
unset($parents[$n][$p]);
}
}
}
echo "\rGenerating $hName.h ";
$fp = fopen($out . $hName . ".h", "w");
if(!is_resource($fp)){
echo "\rCouldn't create $hName.h! " . PHP_EOL;
continue;
}
foreach($list as $name){
if(($pos = strpos($name, "(")) !== false){
$parameter = "";
$len = strlen($name);
$cnt = 0;
for(; $pos < $len; ++$pos){
$c = $name{$pos};
if($name{$pos} === ")" && $cnt === 0){
$parameter = fixName($parameter);
if($parameter !== "" and $parameter !== $n){
if(isset($contents[$parameter])){
$headers[$n][$parameter] = $parameter;
}elseif(isset($baseHeaders[$parameter])){
$headers[$n][$baseHeaders[$parameter]] = $baseHeaders[$parameter];
}
}
break;
}
if($c === "<"){
++$cnt;
}elseif($c === ">"){
--$cnt;
}
if($c === "," and $cnt === 0){
$parameter = fixName($parameter);
if($parameter !== "" and $parameter !== $n){
if(isset($contents[$parameter])){
$headers[$n][$parameter] = $parameter;
}elseif(isset($baseHeaders[$parameter])){
$headers[$n][$baseHeaders[$parameter]] = $baseHeaders[$parameter];
}
}
$parameter = "";
}else{
$parameter .= $c;
}
}
}
}
$sortedHeaders = $headers;
asort($sortedHeaders);
foreach($sortedHeaders[$n] as $h){
if($h{0} === "<"){
fwrite($fp, "#include ".$h."\n");
}else{
$h = fixIncludeName($h);
fwrite($fp, "#include \"".$h.".h\"\n");
}
}
fwrite($fp, "\n");
if($n !== "common_header"){
if(count($parents[$n]) === 0){
fwrite($fp, "class $n{\n");
}else{
$class = "class $n :";
foreach($parents[$n] as $p){
$class .= "\n\tpublic $p,";
}
$class = substr($class, 0, -1) . " {\n";
fwrite($fp, $class);
}
fwrite($fp, "\n\tpublic:\n");
$visibility = "public";
foreach($classInfo as $v => $k){
if($v === "__size"){
fwrite($fp, "\t//Total size: $k\n");
}else{
if($k["visibility"] !== $visibility){
$visibility = $k["visibility"];
fwrite($fp, "\n\t$visibility:\n");
}
fwrite($fp, "\t\t". printType($k, $v) . ";\n");
}
}
if($visibility !== "public"){
fwrite($fp, "\n\tpublic:\n");
}
fwrite($fp, "\n");
}
foreach($list as $name){
$realName = $name;
if(!isset($structInfo[$n][$name])){
$typeInfo = [
"type" => "void",
"pointer" => 0,
"reference" => 0,
"array" => 0,
"const" => false,
"visibility" => "public"
];
$structInfo[$n][$name] = $typeInfo;
if(USE_STRUCTS){
echo "\rNew method $n::$name\n";
}
}else{
$typeInfo = $structInfo[$n][$name];
}
$extra = ";";
if(($pos = strpos($name, "(")) === false){
$name = "static const " . printType($typeInfo, $realName);
if(isset($indexLookup[$n][$realName])){
if(is_int($indexLookup[$n][$realName])){
$extra .= " //length ". ($len = $indexLookup[$n][$realName]);
}else{
$name .= " = " . printData($indexLookup[$n][$realName], $typeInfo);
$extra .= " //length ". ($len = strlen($indexLookup[$n][$realName]));
}
if($typeInfo["type"] === "void" and $classInfo["__size"] > 0 and $len > 0){
if($len === $classInfo["__size"]){
$extra .= " (possible type $n, length matches)";
}elseif($len % $classInfo["__size"] === 0){
$extra .= " (possible type ".$n."[".($len / $classInfo["__size"])."], length + modulo matches)";
}
}
}
}else{
if(USE_ASM and isset($asm[$n."::".$realName])){
$extra = "{\n";
foreach($asm[$n."::".$realName] as $h => $c){
$extra .= "\t\t// @$h: $c\n";
}
$extra .= "\t}";
}else if(USE_ASM and isset($asm["void " . $n."::".$realName])){
$extra = "{\n";
foreach($asm["void " . $n."::".$realName] as $h => $c){
$extra .= "\t\t// @$h: $c\n";
}
$extra .= "\t}";
}
$first = $newName = substr($name, 0, $pos);
$len = strlen($name);
$cnt = 0;
$start = $pos;
$var = 0;
for(; $pos < $len; ++$pos){
$c = $name{$pos};
if($name{$pos} === ")" && $cnt === 0){
if($pos - $start > 2){
$newName .= " var$var";
}
break;
}
if($c === "<"){
++$cnt;
}elseif($c === ">"){
--$cnt;
}
if($c === "," and $cnt === 0){
$newName .= " var$var";
++$var;
}
$newName .= $c;
}
if($first !== $n){
$name = printType($typeInfo, $newName . substr($name, $pos));
}else{
$name = $newName . substr($name, $pos);
}
if(isset($override[$n][$realName])){
$extra = " override$extra";
}
}
fwrite($fp, "\t" . $name . $extra . "\n\n");
}
if($n !== "common_header"){
fwrite($fp, "}\n");
}
$structInfo[$n][$n] = $classInfo;
$processedFiles[$n] = true;
++$lastCount;
}
}while(count($processedFiles) < count($contents) and $lastCount > 0);
if(USE_STRUCTS){
file_put_contents($out . "struct.json", json_encode($structInfo, JSON_PRETTY_PRINT));
}
echo PHP_EOL;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment