-
-
Save nikic/98357b71fd67756b0f064c9517b62a34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php declare(strict_types=1); | |
/** | |
* Usage: php opaquify.php file|dir [file|dir...] | |
* | |
* Tests that fail afterwards usually fall into two categories: | |
* - opaquify.php produced malformed IR. This usually happens because bitcasts | |
* referenced by PHI nodes were dropped. This requires manually updating the | |
* PHI argument. | |
* - There is a difference in IR output. These are usually benign differences, | |
* but those are the cases that require human verification. | |
*/ | |
if (!selfTest()) | |
exit(-1); | |
if ($argc < 2) { | |
echo "Missing path argument.\n"; | |
echo "Usage: php opaquify.php file|dir [file|dir...]\n"; | |
exit(-1); | |
} | |
// Increase backtracking limit for some very large tests. | |
ini_set('pcre.backtrack_limit', '10000000'); | |
ini_set('pcre.recursion_limit', '500000'); | |
$force = false; | |
$keepBitcasts = false; | |
$args = []; | |
foreach (array_slice($argv, 1) as $arg) { | |
if ($arg === '-f' || $arg === '--force') { | |
$force = true; | |
} else if ($arg === '--keep-bitcasts') { | |
$keepBitcasts = true; | |
} else { | |
$args[] = $arg; | |
} | |
} | |
foreach (files($args) as $f) { | |
$isIR = (bool) preg_match('/\.(?:ll|mir)$/', $f); | |
$isC = preg_match('/\.(?:c|cpp|cl|clcpp|cu|m|mm|mlir|fir)$/', $f); | |
if (!$isIR && !$isC) | |
continue; | |
$new = $orig = file_get_contents($f); | |
if ($isC) { | |
$new = str_replace('-Xclang -no-opaque-pointers ', '', $new); | |
$new = str_replace('-no-opaque-pointers ', '', $new); | |
if ($new === $orig && !$force) { | |
// Only opaquify if there was a -no-opaque-pointers flag. | |
continue; | |
} | |
} else { | |
$new = str_replace('-opaque-pointers=0 ', '', $new); | |
if ($new === $orig && !$force) { | |
// Only opaquify if there was an -opaque-pointers=0 flag. | |
continue; | |
} | |
} | |
$new = opaquify($new, $isIR, $keepBitcasts); | |
if ($new !== $orig) { | |
file_put_contents($f, $new); | |
} | |
} | |
function opaquify(string $c, bool $isIR, bool $keepBitcasts = false): string { | |
$c = replaceWithPtrFixpoint($c, $isIR); | |
$c = remangleIntrinsics($c); | |
$c = removeDuplicateDeclarations($c); | |
$c = replaceToFixPoint($c, fn($c) => removeConstantExprs($c)); | |
if (!$keepBitcasts) | |
$c = replaceBitcasts($c, $isIR); | |
return $c; | |
} | |
function minOffset(?int $off1, ?int $off2): ?int { | |
if ($off1 !== null && $off2 !== null) | |
return min($off1, $off2); | |
if ($off1 !== null) | |
return $off1; | |
if ($off2 !== null) | |
return $off2; | |
return null; | |
} | |
function findNextAssign(string $c, string $var, int $startOffset): ?int { | |
$regex = '/' . preg_quote($var, '/') . '\s*=/'; | |
$off = null; | |
if (preg_match($regex, $c, $matches, PREG_OFFSET_CAPTURE, $startOffset)) { | |
$off = $matches[0][1]; | |
} | |
$off = minOffset($off, strpos($c, 'define ', $startOffset) ?: null); | |
$off = minOffset($off, strpos($c, "...\n---", $startOffset) ?: null); | |
return $off; | |
} | |
function getUseName(string $name): string { | |
if (preg_match('/^(%?)\[\[([^:]+):.*\]\]$/', $name, $matches)) { | |
return $matches[1] . '[[' . $matches[2] . ']]'; | |
} | |
return $name; | |
} | |
function replaceInRange( | |
string $regex, string $input, int $startOffset, ?int $endOffset, | |
Closure $fn | |
) { | |
$prefix = substr($input, 0, $startOffset); | |
$middle = substr( | |
$input, $startOffset, | |
$endOffset !== null ? $endOffset - $startOffset : null); | |
$end = $endOffset !== null ? substr($input, $endOffset) : ''; | |
return $prefix . preg_replace_callback($regex, $fn, $middle) . $end; | |
} | |
function renumberInstructions( | |
string $c, int $removedNum, int $startOffset | |
): string { | |
$endOffset = strpos($c, 'define', $startOffset) ?: null; | |
$endOffset = minOffset($endOffset, strpos($c, "...\n---", $startOffset) ?: null); | |
return replaceInRange( | |
'/%(\d+)/', $c, $startOffset, $endOffset, | |
function($matches) use ($removedNum) { | |
$num = (int) $matches[1]; | |
if ($num > $removedNum) { | |
return '%' . ($num - 1); | |
} | |
return $matches[0]; | |
}); | |
} | |
function replaceBitcasts(string $c, bool $isIR): string { | |
// Don't replace zero-index GEPs in clang tests. These are still getting | |
// generated by the frontend. | |
$gep = $isIR ? '| getelementptr(?:\s+inbounds)?\s+[^,]+,\s*(?&ptr_ty)\s+(?<arg>[^,]+)(?:\s*,\s*i\d+\s+0)+' : ''; | |
$regex = <<<REGEX | |
/ | |
(?(DEFINE) | |
(?<ptr_ty> ptr(?:\s+addrspace\s*\(\s*\d+\s*\))?) | |
) | |
^(?<prefix>.*?) (?<res>\S+|%?\[\[.+\]\]) \h+ = \h+ | |
(?| | |
bitcast\h+(?&ptr_ty)\h+(?<arg>\S+)\h+to\h+.* | |
$gep | |
) | |
(?:\s*,\s*!.*)? # Ignore metadata | |
$ | |
/mx | |
REGEX; | |
$offset = 0; | |
while (preg_match($regex, $c, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
$startOffset = $matches[0][1]; | |
$len = strlen($matches[0][0]); | |
$res = $matches['res'][0]; | |
$arg = $matches['arg'][0]; | |
$resUseName = getUseName($res); | |
$argUseName = getUseName($arg); | |
$nextAssign = findNextAssign($c, $res, $startOffset + $len); | |
if ($argUseName === $arg) { | |
// Replace uses of $res with $arg | |
$useRegex = '/' . preg_quote($resUseName, '/') . '(?![\w.])/'; | |
$c = replaceInRange( | |
$useRegex, $c, $startOffset + $len, $nextAssign, | |
function($matches) use($argUseName) { | |
return $argUseName; | |
}); | |
} else { | |
// If the bitcast used to define the argument [[A::%.*]], we need to | |
// shift this definition to the first later occurrence of [[A]] (which | |
// might be a newly introduced one). | |
$useRegex = '/(?:' . preg_quote($resUseName, '/') . '|' | |
. preg_quote($argUseName, '/') . ')(?![\w.])/'; | |
$placedDef = false; | |
$c = replaceInRange( | |
$useRegex, $c, $startOffset + $len, $nextAssign, | |
function($matches) use($arg, $argUseName, &$placedDef) { | |
if (!$placedDef) { | |
$placedDef = true; | |
return $arg; | |
} | |
return $argUseName; | |
}); | |
} | |
// Remove the bitcast itself | |
$c = substr_replace($c, '', $startOffset, $len + 1); | |
$offset = $startOffset; | |
// If CHECK: is followed by CHECK-NEXT:, convert it to CHECK: | |
if (preg_match('/(\w+):/', $matches['prefix'][0], $matches1)) { | |
$checkName = $matches1[1]; | |
if (preg_match('/\G(.*)' . preg_quote($checkName, '/') . '-NEXT:/', $c, $matches, 0, $startOffset)) { | |
$c = substr($c, 0, $startOffset + strlen($matches[1])) . | |
$checkName . ':' . | |
substr($c, $startOffset + strlen($matches[0])); | |
} | |
} | |
// Dropping a numbered instruction requires renumbering | |
if (preg_match('/%(\d+)/', $res, $matches)) | |
$c = renumberInstructions($c, (int) $matches[1], $startOffset); | |
} | |
if (!$isIR) { | |
$c = preg_replace('/^.*?[A-Z0-9](?:-NEXT)?:\h+bitcast(?:|\h+ptr.*)\n/m', '', $c); | |
} | |
return $c; | |
} | |
function removeConstantExprs(string $c): string { | |
// Remove bitcast and zero-offset GEP constexprs. | |
$regex = <<<'REGEX' | |
/ | |
(?(DEFINE) | |
(?<ptr_ty> ptr(?:\s+addrspace\s*\(\s*\d+\s*\))?|\{\{\.\*\}\}) | |
) | |
(?| | |
bitcast\s*\(\s*(?&ptr_ty)\s+(?<arg>.+?)\s+to\s+(?&ptr_ty)\s*\) | |
| getelementptr(?:\s+inbounds)?\s*\((?:(?:.+?,\s*)?(?&ptr_ty)|\{\{\.\*\}\})\s+(?<arg>[^,]+)(?:\s*,\s*i\d+\s+0)+\) | |
)/x | |
REGEX; | |
$c = preg_replace_callback($regex, fn($m) => $m['arg'], $c); | |
// Aliases allow an inconsistent syntax missing the "ptr". | |
$c = preg_replace('/alias([^,]+), @/', 'alias$1, ptr @', $c); | |
return $c; | |
} | |
// Remangle llvm.xyz.p0i8 to llvm.xyz.p0 | |
function remangleIntrinsics(string $c): string { | |
return preg_replace_callback('/@llvm\.[\w.]+/', function($matches) { | |
return preg_replace('/(\.(?:(?:nx)?v\d+)?p\d+)(?:\w*s.+?s(?=\.|$)|[^.]*)/', '$1', $matches[0]); | |
}, $c); | |
} | |
// After remangling, we may have multiple redundant declarations. | |
function removeDuplicateDeclarations(string $c): string { | |
$seenDeclarations = []; | |
return preg_replace_callback( | |
'/^(\h*(?:;|\/\/)\h+\w+:\h*)?declare.*@(llvm\.[\w.]+).*\n/m', | |
function($matches) use (&$seenDeclarations) { | |
$name = $matches[1] . $matches[2]; | |
$duplicate = isset($seenDeclarations[$name]); | |
$seenDeclarations[$name] = true; | |
return $duplicate ? '' : $matches[0]; | |
}, | |
$c); | |
} | |
function replaceOnlyInComments(string $c, callable $fn): string { | |
return preg_replace_callback( | |
'(//.*$)m', fn($matches) => $fn($matches[0]), $c); | |
} | |
function replaceToFixpoint(string $c, Closure $replaceFn): string { | |
do { | |
$new = $replaceFn($c); | |
$changed = $new !== $c; | |
$c = $new; | |
} while ($changed); | |
return $c; | |
} | |
function replaceWithPtrFixpoint(string $c, bool $isIR): string { | |
return replaceToFixpoint($c, function($c) use($isIR) { | |
$c = $isIR | |
? replaceWithPtr($c) | |
: replaceOnlyInComments($c, 'replaceWithPtr'); | |
return replacePlaceholderWithPtr($c); | |
}); | |
} | |
function replaceWithPtr(string $c): string { | |
$regex = <<<'REGEX' | |
/ | |
\[\[ .*? \]\](*SKIP)(*F)| # Skip FileCheck placeholders | |
\{\{ .*? \}\}(*SKIP)(*F)| # Skip FileCheck placeholders | |
\n;(?!\h*[\w-]+:).*(*SKIP)(*F)| # Skip code comments | |
(?: | |
void | half | bfloat | float | double | x86_fp80 | fp128 | ppc_fp128 | |
| label | metadata | x86_mmx | x86_amx | type | label | opaque | token | ptr | |
| i\d+ # Integer type | |
| %[\w.-]+ # Named struct type | |
| %"[^"]+" # Named struct type | |
| (\[ \s* \d+ \s* x [^[\]]* (?: (?-1) [^[\]]* )* \]) # Array type | |
| < (?:vscale\s+x\s+)? \d+ \s* x [^<>]* > # Vector type | |
| (\{ [^{}]* (?: (?-1) [^{}]* )* \}) # Struct type | |
| < (?-1) > # Packed struct type | |
) | |
(?: \s* (\( [^()]* (?: (?-1) [^()]* )* \)) )? # Function type | |
(?: # Ignore intermediate pointer types | |
(?: \s* addrspace \s* \( \s* \d+ \s* \) )? | |
\s* \* | |
)* | |
(?<addrspace> \s* addrspace \s* \( \s* \d+ \s* \) )? | |
\s* \* # Pointer type | |
(?<trailing>.?) | |
/x | |
REGEX; | |
$c = preg_replace_callback($regex, function($matches) { | |
$trailing = $matches['trailing']; | |
$addSpace = ctype_alpha($trailing) || $trailing === '%' || $trailing === '@'; | |
return 'ptr' . $matches['addrspace'] . ($addSpace ? ' ' : '') . $trailing; | |
}, $c); | |
if ($c === null) { | |
throw new Exception('PCRE replace failed with: ' . preg_last_error_msg()); | |
} | |
return $c; | |
} | |
// Handle pointer to placeholders separately, as it conflicts with the | |
// placeholder skipping above. | |
function replacePlaceholderWithPtr(string $c): string { | |
$regex = <<<'REGEX' | |
/ | |
%? | |
(?: i?\[\[(?:\#\w+|(?<name>\w+)(?::(?<regex>[^]]+))?)\]\] | |
| i?(?:%\w+)?\{\{[^}]+\}\} | |
) | |
(?<addrspace>(?:\s+ addrspace \s* \( \s* \d+ \s* \) )?) | |
\* | |
/x | |
REGEX; | |
$offset = 0; | |
while (preg_match($regex, $c, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
$startOffset = $matches[0][1]; | |
$len = strlen($matches[0][0]); | |
$replacement = 'ptr' . $matches['addrspace'][0]; | |
$c = substr_replace($c, $replacement, $startOffset, $len); | |
$offset = $startOffset + strlen($replacement); | |
if (isset($matches['regex']) && $matches['regex'][0] !== "") { | |
// Move var definition to next use. | |
$name = $matches['name'][0]; | |
$varRegex = $matches['regex'][0]; | |
$c = substr($c, 0, $offset) . preg_replace( | |
'/\[\[' . preg_quote($name) . '\]\]/', | |
'[[' . $name . ':' . $varRegex . ']]', | |
substr($c, $offset), | |
limit: 1); | |
} | |
} | |
return $c; | |
} | |
function files(array $paths): iterable { | |
foreach ($paths as $path) { | |
if (is_file($path)) { | |
yield $path; | |
} else { | |
$it = new RecursiveIteratorIterator( | |
new RecursiveDirectoryIterator($path), | |
RecursiveIteratorIterator::LEAVES_ONLY | |
); | |
foreach ($it as $file) { | |
yield $file->getPathName(); | |
} | |
} | |
} | |
} | |
function selfTest() { | |
$input = <<<'INPUT' | |
i8* | |
i8 addrspace(1)* | |
i8 addrspace(1)* addrspace(2)* | |
i8* (i8*)* | |
i8 (i8 addrspace(1)*)* | |
{ i8, { i8 } }* | |
<vscale x 2 x i32>* | |
<{ i32, i32 }>* | |
[[T]]* | |
[[T:.*]]* | |
[[T]] addrspace(1)* | |
i[[#N]]* | |
i[[N:.*]]* | |
i[[N]]* | |
{{.+}}* | |
i{{32|64}}* | |
%foo{{.+}}* | |
i8 *%x | |
i8 *@x | |
i8*{{[a-z_ ]*}} | |
; CHECK: declare void @llvm.foo.p0i8.p1i16() attrs | |
; CHECK: declare void @llvm.foo.p0i8.p1i32() attrs | |
// FOO: declare void @llvm.foo.p0i8.p1i32() attrs | |
declare void @llvm.foo.p0i8.p1i16() attrs | |
declare void @llvm.foo.p0i8.p1i32() attrs | |
declare void @llvm.bar.v8p0i16() | |
declare void @llvm.bar.nxv8p0i16() | |
declare void @llvm.bar.p256() | |
declare void @llvm.bar.p0s_struct.s1s.p0s_struct.s1s() | |
declare void @llvm.bar.p0s_struct.s1s.p0a3s_struct.s1s() | |
; CHECK-NEXT: [[TMP:%.*]] = phi ptr [ null, [[ENTRY:%.*]] ], [ getelementptr inbounds ([[MYSTRUCT:%.*]], ptr @var, i64 0, i32 0), [[SELECT]] ] | |
%call = call i8 addrspace(200)* @strcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0)) | |
store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2 | |
bitcast ({{.*}} %x to i8*) | |
getelementptr ({{.*}} %x, i32 0, i32 0) | |
getelementptr ({{.*}}* %x, i32 0, i32 0) | |
getelementptr (<{ i32, i32 }>, <{ i32, i32 }>* @_MergedGlobals.1, i32 0, i32 0) | |
%res = bitcast ptr %arg to ptr, !dbg !0 | |
use(%res) | |
use(%res_suffix) | |
%res = something | |
use(%res) | |
%1 = bitcast ptr %arg to ptr | |
%2 = foo | |
use(ptr %0, ptr %1, ptr %2) | |
; CHECK: [[PF:%.*]] = bitcast ptr [[P]] to ptr | |
; CHECK-NEXT: [[V2F:%.*]] = load float, ptr [[PF]], align 4 | |
; CHECK-NEXT: %[[X:%.*]] = bitcast ptr %p to ptr | |
; CHECK-NEXT: %[[Y:%.*]] = load float, ptr %[[X]], align 4 | |
; CHECK-NEXT: [[X2:%.*]] = bitcast ptr [[X1:%.*]] to ptr | |
; CHECK-NEXT: load float, ptr [[X1]], align 4 | |
; CHECK-NEXT: load float, ptr [[X2]], align 4 | |
; CHECK-NEXT: load float, ptr [[X2]], align 4 | |
; CHECK-NEXT: [[V:%[^ ]+]] = bitcast ptr %p to ptr | |
; CHECK-NEXT: [[W:%[^ ]+]] = load float, ptr [[V]], align 4 | |
; CHECK-NEXT: %[[Q:[^ ]+]] = bitcast ptr %p to ptr | |
; CHECK-NEXT: %[[R:[^ ]+]] = load float, ptr %[[Q]], align 4 | |
%q = bitcast i8* %p to [[AGG:{ i32 }]]* | |
getelementptr [[AGG]], [[AGG]]* %q, i64 1 | |
[[B:%[a-z]*]] | |
INPUT; | |
$expected = <<<'EXPECTED' | |
ptr | |
ptr addrspace(1) | |
ptr addrspace(2) | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr addrspace(1) | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr | |
ptr %x | |
ptr @x | |
ptr{{[a-z_ ]*}} | |
; CHECK: declare void @llvm.foo.p0.p1() attrs | |
// FOO: declare void @llvm.foo.p0.p1() attrs | |
declare void @llvm.foo.p0.p1() attrs | |
declare void @llvm.bar.v8p0() | |
declare void @llvm.bar.nxv8p0() | |
declare void @llvm.bar.p256() | |
declare void @llvm.bar.p0.p0() | |
; CHECK-NEXT: [[TMP:%.*]] = phi ptr [ null, [[ENTRY:%.*]] ], [ @var, [[SELECT]] ] | |
%call = call ptr addrspace(200) @strcpy(ptr addrspace(200) %dst, ptr addrspace(200) @str) | |
store i16 1, ptr @b, align 2 | |
%x | |
%x | |
%x | |
@_MergedGlobals.1 | |
use(%arg) | |
use(%res_suffix) | |
%res = something | |
use(%res) | |
%1 = foo | |
use(ptr %0, ptr %arg, ptr %1) | |
; CHECK: [[V2F:%.*]] = load float, ptr [[P]], align 4 | |
; CHECK-NEXT: %[[Y:%.*]] = load float, ptr %p, align 4 | |
; CHECK-NEXT: load float, ptr [[X1:%.*]], align 4 | |
; CHECK-NEXT: load float, ptr [[X1]], align 4 | |
; CHECK-NEXT: load float, ptr [[X1]], align 4 | |
; CHECK-NEXT: [[W:%[^ ]+]] = load float, ptr %p, align 4 | |
; CHECK-NEXT: %[[R:[^ ]+]] = load float, ptr %p, align 4 | |
getelementptr [[AGG:{ i32 }]], ptr %p, i64 1 | |
[[B:%[a-z]*]] | |
EXPECTED; | |
$result = opaquify($input, isIR: true); | |
if ($result !== $expected) { | |
echo "Self-test (IR) failed:\n$result\n"; | |
return false; | |
} | |
$input = <<<'INPUT' | |
void *foo(void *); | |
// CHECK: i8* | |
// CHECK-NEXT: bitcast ptr {{.*}} to ptr | |
// CHECK-NEXT: bitcast ptr | |
// CHECK-NEXT: bitcast | |
// CHECK-NEXT: bitcast <2 x i32> %x to <4 x i16> | |
// FOO: bitcast ptr | |
INPUT; | |
$expected = <<<'EXPECTED' | |
void *foo(void *); | |
// CHECK: ptr | |
// CHECK-NEXT: bitcast <2 x i32> %x to <4 x i16> | |
EXPECTED; | |
$result = opaquify($input, isIR: false); | |
if ($result !== $expected) { | |
echo "Self-test (C) failed:\n$result\n"; | |
return false; | |
} | |
return true; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment