-
-
Save 0xced/937908 to your computer and use it in GitHub Desktop.
#import <Foundation/Foundation.h> | |
#import <objc/runtime.h> | |
static id pear() | |
{ | |
__asm__("nop"); | |
return @"pear"; | |
} | |
static id banana(id self, SEL _cmd, ...) | |
{ | |
return @"banana"; | |
} | |
@interface Fruit : NSObject | |
@end | |
@implementation Fruit | |
- (id) apple | |
{ | |
return @"apple"; | |
} | |
@end | |
int main (int argc, char const *argv[]) | |
{ | |
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; | |
FILE *devnull = fopen("/dev/null", "w"); | |
Method apple = class_getInstanceMethod([Fruit class], @selector(apple)); | |
method_setImplementation(apple, banana); | |
Fruit *fruit = [[[Fruit alloc] init] autorelease]; | |
fprintf(devnull, "(0x100000c3c - 0x100000bac) / 4 = 36"); | |
printf("%s\n", [[fruit apple] UTF8String]); | |
fclose(devnull); | |
[pool drain]; | |
return 0; | |
} |
# $ gcc --version | |
# i686-apple-darwin10-gcc-4.2.1 (GCC) 4.2.1 (Apple Inc. build 5666) (dot 3) | |
# Copyright (C) 2007 Free Software Foundation, Inc. | |
# This is free software; see the source for copying conditions. There is NO | |
# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
# | |
# $ clang --version | |
# Apple clang version 1.7 (tags/Apple/clang-77) (based on LLVM 2.9svn) | |
# Target: x86_64-apple-darwin10 | |
# Thread model: posix | |
all: | |
gcc -arch x86_64 fruit.m -o fruit-gcc -framework Foundation | |
clang -arch x86_64 fruit.m -o fruit-clang -framework Foundation | |
clean: | |
rm -f fruit-gcc fruit-clang | |
run: all | |
./fruit-gcc | |
./fruit-clang |
The path to gcc's unexpected result is in two parts. First, the ObjC runtime shenanigans lead to a non-variadic call site that calls a variadic implementation, which leaves one register value uninitialized. Second, gcc's codegen performs an out-of-bounds jump when given that uninitialized value, which happens to jump to instructions that return the wrong value without crashing.
On x86_64, a call site for a variadic function (one with ...
in the parameter list) sets register %al
with the number of XMM registers that hold parameter values. There are eight XMM parameter registers, so this value ought to be 0...8. Non-variadic call sites don't need to set %al
.
The call site in question is [fruit apple]
. The compiler sees that the method signature for -apple
is non-variadic. Thus the generated code for the call [fruit apple]
does not set %al
. Its value happened to be 36 as returned by fprintf
.
At runtime the call site [fruit apple]
instead jumps to the implementation of -banana
thanks to the earlier method_setImplementation
call. -banana
is variadic and expects %al
to have been set by the caller. It wasn't.
gcc-4.2.1's prologue for -banana
used a computed branch to save the XMM registers. It looks something like this:
jump end - %al * 4; // each store instruction is 4 bytes long
end-32: store %xmm7;
end-28: store %xmm6;
end-24: store %xmm5;
end-20: store %xmm4;
end-16: store %xmm3;
end-12: store %xmm2;
end-8: store %xmm1;
end-4: store %xmm0;
end: ...
This works fine when %al
is within 0...8 as expected, but when %al
is 36 it jumps backwards a few dozen bytes. In this case it happened to jump into -pear
at an instruction that returned @"pear"
successfully.
This behavior is of course extremely fragile. Turning on compiler optimizations probably removes the XMM-saving prologue entirely. Almost any code change will lead to a different value in %al
at the call site or emit different instructions at the destination of the wild jump. Generating code that returns the wrong value without crashing was unlikely.
clang-1.7's variadic prologue was different. It simply checks if %al
is zero and saves either zero or eight XMM registers. It doesn't care if %al
is too big.
Result with clang:
banana
Result with gcc:
pear
Explanation: I don't really remember, it was 9 years ago, I should have written it at that time! It has something to do with how variadic functions are called in the x64 ABI. The %rax register is clobbered by the
fprintf
call with the value36
(the length of the string"(0x100000c3c - 0x100000bac) / 4 = 36"
). I thinkbanana
was at address0x100000c3c
andpear
was at address0x100000bac
. I remember adding the__asm__("nop");
so that thepear
function would be at an address which is a multiple of 4.