Skip to content

Instantly share code, notes, and snippets.

@shoghicp
Last active September 26, 2020 10:33
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save shoghicp/f3453eed1b84a8b93f9a481620161884 to your computer and use it in GitHub Desktop.
Save shoghicp/f3453eed1b84a8b93f9a481620161884 to your computer and use it in GitHub Desktop.
Header Extractor - Usage: php run.php --input libminecraftpe.so --output headers/ --pointer-size 4 --asm
<?php
/**
* Minecraft: Pocket Edition header extractor
*
* This tool needs objdump (and objdump-multiarch + arm variants) installed on the current system
*
*/
const VERSION = "0.0.1";
$baseHeaders = [
"std::string" => "<string>",
"std::shared_ptr" => "<memory>",
"std::unique_ptr" => "<memory>",
"std::vector" => "<vector>",
"std::array" => "<array>",
"std::unordered_map" => "<unordered_map>",
"std::map" => "<map>",
"std::pair" => "<utility>",
];
function printType($typeInfo, $name = ""){
return $typeInfo["type"].
($typeInfo["reference"] ? str_repeat("&", $typeInfo["reference"]) : "").
($typeInfo["pointer"] ? str_repeat("*", $typeInfo["pointer"]) : "").
($name !== "" ? " " . $name : "").
($typeInfo["array"] ? ($typeInfo["array"] === "n" ? "[]" : "[" . $typeInfo["array"] . "]" ) : "");
}
function printData($data, $typeInfo){
switch($typeInfo["type"]){
case "bool":
return ord($data) > 0;
break;
case "long double":
case "double":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 3 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("d", substr($data, $i * 4, 4))[1];
}
return "{ " . implode("l, ", $d) . "}";
}else{
return unpack("d", $data)[1] . "l";
}
break;
case "float":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("f", substr($data, $i * 4, 4))[1];
}
return "{ " . implode("f, ", $d) . "}";
}else{
return unpack("f", $data)[1] . "f";
}
break;
case "unsigned int":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("V", substr($data, $i * 4, 4))[1];
}
return "{ " . implode(", ", $d) . "}";
}else{
return unpack("V", $data)[1];
}
break;
case "int":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 2 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = unpack("V", substr($data, $i * 4, 4))[1];
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 32 >> 32;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = unpack("V", $data)[1];
if(PHP_INT_SIZE === 8){
$d = $d << 32 >> 32;
}
return $d;
}
break;
case "unsigned short":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 1 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = unpack("v", substr($data, $i * 2, 2))[1];
}
return "{ " . implode(", ", $d) . "}";
}else{
return unpack("v", $data)[1];
}
break;
case "short":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) >> 1 : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = unpack("v", substr($data, $i * 2, 2))[1];
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 48 >> 48;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = unpack("v", $data)[1];
if(PHP_INT_SIZE === 8){
$d = $d << 48 >> 48;
}
return $d;
}
break;
case "unsigned char":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[] = ord($data{$i});
}
return "{ " . implode(", ", $d) . "}";
}else{
return ord($data);
}
case "char":
if($typeInfo["array"]){
$d = [];
$len = $typeInfo["array"] === "n" ? strlen($data) : (int) $typeInfo["array"];
for($i = 0; $i < $len; ++$i){
$d[$i] = ord($data{$i});
if(PHP_INT_SIZE === 8){
$d[$i] = $d[$i] << 63 >> 63;
}
}
return "{ " . implode(", ", $d) . "}";
}else{
$d = ord($data);
if(PHP_INT_SIZE === 8){
$d = $d << 63 >> 63;
}
return $d;
}
break;
}
return "0x" . bin2hex($data);
}
function parseStruct($path){
$fp = @fopen($path, "r");
if(!is_resource($fp)){
return [];
}
return (array) @json_decode(str_replace("\t", " ", stream_get_contents($fp)), true);
}
function fixIncludeName($str){
return str_replace("::", "/", $str);
}
function fixName($str){
$name = str_replace(["*", "&", "(", ")", " ", "const"], "", $str);
if(($pos = strpos($name, "<")) !== false){
$name = substr($name, 0, $pos);
}
return $name;
}
function runCommand($cmd, &$exitCode = null){
ob_start();
passthru($cmd, $exitCode);
$output = ob_get_contents();
ob_end_clean();
return $output;
}
function parseDataHex($data, &$content, &$dOffset){
foreach(explode("\n", $data) as $line){
if($line !== "" and $line{0} === " "){
if(preg_match("/([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) ([0-9a-f]+) .*/", $line, $matches) > 0){
$offset = hexdec($matches[1]);
if($dOffset === 0){
$dOffset = $offset;
}
$content .= hex2bin($matches[2]) . hex2bin($matches[3]) . hex2bin($matches[4]) . hex2bin($matches[5]);
}
}
}
}
function getInitializedDataTable($offset, $length = null, $ignoreNull = false){
global $initializedDataOffset, $initializedData;
$offset -= $initializedDataOffset;
if(!$ignoreNull and substr($initializedData, $offset, POINTER_SIZE) !== str_repeat("\x00", POINTER_SIZE)){
return null;
}
if($length === null){
$length = 0;
for($i = 1; $i <= 512; ++$i){
if(substr($initializedData, $offset + $i * POINTER_SIZE, POINTER_SIZE) === str_repeat("\x00", POINTER_SIZE)){
$length = $i * POINTER_SIZE;
break;
}
}
}
return substr($initializedData, $offset, $length);
}
function getDataTable($offset, $length = null, $ignoreNull = false){
global $rodataOffset, $rodata;
$offset -= $rodataOffset;
if(!$ignoreNull and substr($rodata, $offset, POINTER_SIZE) !== str_repeat("\x00", POINTER_SIZE)){
return null;
}
if($length === null){
$length = 0;
for($i = 1; $i <= 512; ++$i){
if(substr($rodata, $offset + $i * POINTER_SIZE, POINTER_SIZE) === str_repeat("\x00", POINTER_SIZE)){
$length = $i * POINTER_SIZE;
break;
}
}
}
return substr($rodata, $offset, $length);
}
function parseTypeInfo($str){
$data = [];
foreach(str_split($str, POINTER_SIZE) as $i => $d){
$data[$i] = unpack("V", $d)[1] & (~(POINTER_SIZE - 1));
}
return $data;
}
function parseVtable($str){
$data = [];
foreach(str_split($str, POINTER_SIZE) as $i => $d){
$data[$i] = unpack("V", $d)[1] & (~(POINTER_SIZE - 1));
}
return $data;
}
$opts = getopt("", ["input:", "output:", "dwarf", "struct", "asm", "help", "pointer-size:"]);
if(!isset($opts["input"]) or !isset($opts["output"]) or !file_exists($opts["input"])){
$opts["help"] = true;
}
if(isset($opts["help"])){
echo "Minecraft: Pocket Edition header extractor" . PHP_EOL;
echo "\tversion ". VERSION . PHP_EOL;
echo PHP_EOL;
echo "Usage: ". PHP_BINARY . $argv[0] . "--input libminecraft.so --output /output/path [options]" . PHP_EOL;
echo "\t--input => libminecraft.so file path" . PHP_EOL;
echo "\t--output => output directory for headers" . PHP_EOL;
echo "\t--pointer-size => sets the pointer size (default 4)" . PHP_EOL;
echo "\t--struct => use struct generation files" . PHP_EOL;
echo "\t--dwarf => use DWARF symbols if available" . PHP_EOL;
echo "\t--asm => include method usage information and assembly" . PHP_EOL;
echo "\t--help => Show this screen" . PHP_EOL;
echo PHP_EOL;
die();
}
ini_set("memory_limit", -1);
define("USE_STRUCTS", isset($opts["struct"]) ? true : false);
define("USE_ASM", isset($opts["asm"]) ? true : false);
define("POINTER_SIZE", isset($opts["pointer-size"]) ? (int) $opts["pointer-size"] : 4);
$path = $opts["input"];
$out = $opts["output"] . "/";
echo "Getting symbol information..." . PHP_EOL;
$result = runCommand("objdump --dynamic-syms --demangle=auto {$path}");
echo "Getting initialized .data.rel.ro information..." . PHP_EOL;
$initializedData = "";
$initializedDataOffset = 0;
parseDataHex(runCommand("objdump -s -j .data.rel.ro {$path}"), $initializedData, $initializedDataOffset);
$initializedDataLookup = [];
$typeinfoLookup = [];
echo "Getting .rodata information..." . PHP_EOL;
$rodata = "";
$rodataOffset = 0;
parseDataHex(runCommand("objdump -s -j .rodata {$path}"), $rodata, $rodataOffset);
$methodLookup = [];
$dwarf = "";
$asm = [];
if(USE_ASM){
echo "Getting ASM information..." . PHP_EOL;
$state = 0;
$entry = "";
$entries = [];
foreach(explode("\n", runCommand("objdump --demangle=auto --disassemble-zeroes --source {$path}")) as $line){
$line = trim($line);
if($line === ""){
if(count($entries) > 0){
$asm[$entry] = $entries;
$entries = [];
$entry = "";
}
$state = 0;
continue;
}
if($state === 0 and preg_match("/^([0-9a-f]+) <(.*)>:$/", $line, $matches) > 0){
$state = 1;
$entry = $matches[2];
}elseif($state === 1 and preg_match("/^([0-9a-f]+):[ \t]+([0-9a-f]+.*)$/", $line, $matches) > 0){
$entries[$matches[1]] = trim($matches[2]);
}
}
}
@mkdir($out, 0777);
$contents = [
"common_header" => []
];
$indexLookup = [];
$structInfo = [];
if(USE_STRUCTS){
$structInfo = parseStruct($out . "struct.json");
}
foreach(explode("\n", $result) as $line){
if(preg_match("/([0-9a-f]+)[a-zA-Z \t]+(\\.[a-z\\.]+)[ \t]+([0-9a-f]+)( Base |) (.*)/", trim($line), $matches) > 0){
$c = trim($matches[5]);
$targetCall = ($matches[2] === ".rodata" ? "getDataTable" : "getInitializedDataTable");
if(substr($c, 0, 6) === "vtable"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]));
if($info !== null){
$data = parseVtable($info);
$initializedDataLookup[fixName(substr($c, 11))] = $data;
}
continue;
}
if(substr($c, 0, 13) === "typeinfo name"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]), true);
if($info !== null){
$info = ltrim($info, "0123456789");
//var_dump($info);
//die();
//$data = parseTypeInfo($info);
//$typeInfoLookup[fixName(substr($c, 13))] = $data;
}
continue;
}elseif(substr($c, 0, 8) === "typeinfo"){
$info = $targetCall(hexdec($matches[1]), hexdec($matches[3]), true);
}
if(substr($c, 0, 5) === "void "){
$c = substr($c, 5);
}
if(substr($c, 0, 5) === "std::"
or substr($c, 0, 2) === "__"){
$contents["common_header"][$c] = $c;
continue;
}
if(strpos($c, "::") === false){
continue;
}
$methodLookup[$off = hexdec($matches[1])] = $c;
$d = explode("::", ($pos = strpos($c, "(")) !== false ? substr($c, 0, $pos) : $c);
$second = trim(array_pop($d));
$first = trim(implode("::", $d));
if($first === ""){
$contents["common_header"][$c] = $c;
continue;
}
$c = substr($c, strlen($first) + 2);
$first = fixName($first);
if(!isset($contents[$first])){
$contents[$first] = [];
$indexLookup[$first] = [];
}
$contents[$first][$c] = $c;
if($matches[2] === ".rodata"){
$indexLookup[$first][$c] = substr($rodata, hexdec($matches[1]) - $rodataOffset, hexdec($matches[3]));
}elseif($matches[2] === ".bss"){
$indexLookup[$first][$c] = hexdec($matches[3]);
}
}
}
$processedFiles = [];
$override = [];
$headers = [];
$parents = [];
do{
$lastCount = 0;
foreach($contents as $n => $list){
$hName = fixIncludeName($n);
asort($list);
$override[$n] = [];
$headers[$n] = [];
$parents[$n] = [];
$continue = true;
if(isset($initializedDataLookup[$n])){
foreach($initializedDataLookup[$n] as $i => $p){
if(isset($methodLookup[$p])){
$c = $methodLookup[$p];
$d = explode("::", $c);
$second = trim(array_pop($d));
$first = trim(implode("::", $d));
if($first !== $n and isset($contents[$first])){
if(!isset($processedFiles[$first])){
$continue = false;
break;
}
$parents[$n][$first] = $first;
$headers[$n][$first] = $first;
foreach($list as $k){
if(isset($contents[$first][$k])){
$override[$n][$k] = true;
}
}
}
}
}
}
if(!$continue){
unset($contents[$n]);
$contents[$n] = $list;
continue;
}
if(strpos($hName, "/") !== false){
@mkdir(dirname($out . $hName), 0777, true);
}
if(!isset($structInfo[$n])){
$structInfo[$n] = [];
}
if(!isset($structInfo[$n][$n])){
$classInfo = [
"__size" => 0
];
$structInfo[$n][$n] = $classInfo;
if(USE_STRUCTS){
echo "\rNew class $n";
}
}else{
$classInfo = $structInfo[$n][$n];
if(!isset($classInfo["__size"])){
$classInfo["__size"] = 0;
}
}
$totalSize = 0;
foreach($classInfo as $v => $k){
if($v === "__size"){
continue;
}else{
if($k["pointer"]){
$totalSize += POINTER_SIZE;
continue;
}
$size = 0;
switch($k["type"]){
case "bool":
case "char":
case "unsigned char":
$size = 1;
break;
case "short":
case "unsigned short":
$size = 2;
break;
case "int":
case "unsigned int":
case "float":
$size = 4;
break;
case "long double":
case "double":
$size = 8;
break;
default:
if($k["type"] !== $n and isset($contents[$k["type"]]) and !isset($processedFiles[$k["type"]])){
$continue = false;
break;
}elseif(isset($contents[$k["type"]])){
$headers[$n][$k["type"]] = $k["type"];
$size = $structInfo[$k["type"]][$k["type"]]["__size"];
}
}
if($k["array"]){
if($k["array"] === "n"){
continue;
}else{
$totalSize += $size * $k["array"];
}
}else{
$totalSize += $size;
}
}
}
if(!$continue){
unset($contents[$n]);
$contents[$n] = $list;
continue;
}
if($classInfo["__size"] !== 0 and $totalSize !== $classInfo["__size"]){
echo "\rClass $n size differs, old: ".$classInfo["__size"].", new $totalSize" . PHP_EOL;
}else{
$classInfo["__size"] = $totalSize;
}
//Correct shared parents
foreach($parents[$n] as $i => $p){
foreach($parents[$n] as $i2 => $p2){
if(isset($parents[$p2][$p])){
unset($parents[$n][$p]);
}
}
}
echo "\rGenerating $hName.h ";
$fp = fopen($out . $hName . ".h", "w");
if(!is_resource($fp)){
echo "\rCouldn't create $hName.h! " . PHP_EOL;
continue;
}
foreach($list as $name){
if(($pos = strpos($name, "(")) !== false){
$parameter = "";
$len = strlen($name);
$cnt = 0;
for(; $pos < $len; ++$pos){
$c = $name{$pos};
if($name{$pos} === ")" && $cnt === 0){
$parameter = fixName($parameter);
if($parameter !== "" and $parameter !== $n){
if(isset($contents[$parameter])){
$headers[$n][$parameter] = $parameter;
}elseif(isset($baseHeaders[$parameter])){
$headers[$n][$baseHeaders[$parameter]] = $baseHeaders[$parameter];
}
}
break;
}
if($c === "<"){
++$cnt;
}elseif($c === ">"){
--$cnt;
}
if($c === "," and $cnt === 0){
$parameter = fixName($parameter);
if($parameter !== "" and $parameter !== $n){
if(isset($contents[$parameter])){
$headers[$n][$parameter] = $parameter;
}elseif(isset($baseHeaders[$parameter])){
$headers[$n][$baseHeaders[$parameter]] = $baseHeaders[$parameter];
}
}
$parameter = "";
}else{
$parameter .= $c;
}
}
}
}
$sortedHeaders = $headers;
asort($sortedHeaders);
foreach($sortedHeaders[$n] as $h){
if($h{0} === "<"){
fwrite($fp, "#include ".$h."\n");
}else{
$h = fixIncludeName($h);
fwrite($fp, "#include \"".$h.".h\"\n");
}
}
fwrite($fp, "\n");
if($n !== "common_header"){
if(count($parents[$n]) === 0){
fwrite($fp, "class $n{\n");
}else{
$class = "class $n :";
foreach($parents[$n] as $p){
$class .= "\n\tpublic $p,";
}
$class = substr($class, 0, -1) . " {\n";
fwrite($fp, $class);
}
fwrite($fp, "\n\tpublic:\n");
$visibility = "public";
foreach($classInfo as $v => $k){
if($v === "__size"){
fwrite($fp, "\t//Total size: $k\n");
}else{
if($k["visibility"] !== $visibility){
$visibility = $k["visibility"];
fwrite($fp, "\n\t$visibility:\n");
}
fwrite($fp, "\t\t". printType($k, $v) . ";\n");
}
}
if($visibility !== "public"){
fwrite($fp, "\n\tpublic:\n");
}
fwrite($fp, "\n");
}
foreach($list as $name){
$realName = $name;
if(!isset($structInfo[$n][$name])){
$typeInfo = [
"type" => "void",
"pointer" => 0,
"reference" => 0,
"array" => 0,
"const" => false,
"visibility" => "public"
];
$structInfo[$n][$name] = $typeInfo;
if(USE_STRUCTS){
echo "\rNew method $n::$name\n";
}
}else{
$typeInfo = $structInfo[$n][$name];
}
$extra = ";";
if(($pos = strpos($name, "(")) === false){
$name = "static const " . printType($typeInfo, $realName);
if(isset($indexLookup[$n][$realName])){
if(is_int($indexLookup[$n][$realName])){
$extra .= " //length ". ($len = $indexLookup[$n][$realName]);
}else{
$name .= " = " . printData($indexLookup[$n][$realName], $typeInfo);
$extra .= " //length ". ($len = strlen($indexLookup[$n][$realName]));
}
if($typeInfo["type"] === "void" and $classInfo["__size"] > 0 and $len > 0){
if($len === $classInfo["__size"]){
$extra .= " (possible type $n, length matches)";
}elseif($len % $classInfo["__size"] === 0){
$extra .= " (possible type ".$n."[".($len / $classInfo["__size"])."], length + modulo matches)";
}
}
}
}else{
if(USE_ASM and isset($asm[$n."::".$realName])){
$extra = "{\n";
foreach($asm[$n."::".$realName] as $h => $c){
$extra .= "\t\t// @$h: $c\n";
}
$extra .= "\t}";
}else if(USE_ASM and isset($asm["void " . $n."::".$realName])){
$extra = "{\n";
foreach($asm["void " . $n."::".$realName] as $h => $c){
$extra .= "\t\t// @$h: $c\n";
}
$extra .= "\t}";
}
$first = $newName = substr($name, 0, $pos);
$len = strlen($name);
$cnt = 0;
$start = $pos;
$var = 0;
for(; $pos < $len; ++$pos){
$c = $name{$pos};
if($name{$pos} === ")" && $cnt === 0){
if($pos - $start > 2){
$newName .= " var$var";
}
break;
}
if($c === "<"){
++$cnt;
}elseif($c === ">"){
--$cnt;
}
if($c === "," and $cnt === 0){
$newName .= " var$var";
++$var;
}
$newName .= $c;
}
if($first !== $n){
$name = printType($typeInfo, $newName . substr($name, $pos));
}else{
$name = $newName . substr($name, $pos);
}
if(isset($override[$n][$realName])){
$extra = " override$extra";
}
}
fwrite($fp, "\t" . $name . $extra . "\n\n");
}
if($n !== "common_header"){
fwrite($fp, "}\n");
}
$structInfo[$n][$n] = $classInfo;
$processedFiles[$n] = true;
++$lastCount;
}
}while(count($processedFiles) < count($contents) and $lastCount > 0);
if(USE_STRUCTS){
file_put_contents($out . "struct.json", json_encode($structInfo, JSON_PRETTY_PRINT));
}
echo PHP_EOL;
@SOF3
Copy link

SOF3 commented Sep 25, 2016

C++ madness. This explains the mess of isSolid etc methods in PocketMine?

image

image

image

image

image

image

image

@dktapps
Copy link

dktapps commented Sep 26, 2016

Why now? After all this time

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment