nddrylliog/00_fun.c

## 00_fun.c
#include <assert.h>
#include <stdio.h>

float takes_a_vec3(float *v) {
    return v[0]+v[1]+v[2];
}

float takes_three_floats(float x, float y, float z) {
    // look ma, no hands!
    return takes_a_vec3(&z);
}

int main(int argc, char **argv) {
    // passes. why? i know why. do you? :D
    assert(takes_three_floats(1,2,3) == 6);
    puts("All good.");

    return 0;
}

## 01_prereqs.md

      
    Raw
  

              01_prereqs.md
            
          
    This program is funny with Clang because it works on -O0, crashes on -O1, then works again on -O2. Why?
Let's find out why
Basic info

Everything was compiled on Mac OSX 10.9.3 with:
clang -g -O[0123] fun.c -o fun

And disassembled with
otool -tVv fun

Info:
$ clang --version
Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn)
Target: x86_64-apple-darwin13.2.0
Thread model: posix


## fun-O0.S
fun:
(__TEXT,__text) section

# float takes_a_vec3(float *v)
_takes_a_vec3:
## prelude - save %rbp (current stack frame)
0000000100000e20	pushq	%rbp
0000000100000e21	movq	%rsp, %rbp
## save %rdi (pointer register, our argument) on the stack!
0000000100000e24	movq	%rdi, -0x8(%rbp)
## ..nevermind, load it back, we need it (typical non-optimized code)
0000000100000e28	movq	-0x8(%rbp), %rdi
## take the first float pointed to by %rdi, put it in %xmm0 (floating point register)
0000000100000e2c	movss	(%rdi), %xmm0
## save %rdi on the stack for some reason...
0000000100000e30	movq	-0x8(%rbp), %rdi
## adding the second float pointed to by %rdi into %xmm0 (so, 1.0 + 2.0)
0000000100000e34	addss	0x4(%rdi), %xmm0
## unnecessary saving again
0000000100000e39	movq	-0x8(%rbp), %rdi
## and adding the third element (3.0 + 3.0)
0000000100000e3d	addss	0x8(%rdi), %xmm0
## restore %rbp
0000000100000e42	popq	%rbp
## and return. the result is in %xmm0, which is expected because the function
## returns a float (and we return by register here)
0000000100000e43	ret
## for some reason many archs insist on clearing rax, so let's clear it.
0000000100000e44	nopw	%cs:(%rax,%rax)

# float takes_three_floats(float x, float y, float z)
_takes_three_floats:
## prelude - stave rbp
0000000100000e50	pushq	%rbp
0000000100000e51	movq	%rsp, %rbp
## allocate 16 bytes to store our local variables in.
## the stack grows down - we subtract from %rsp to allocate
0000000100000e54	subq	$0x10, %rsp
## first arg is at -0x4(%rbp), second is at -0x8, third is at -0xc
## take the address of the third arg, put it in %rdi
0000000100000e58	leaq	-0xc(%rbp), %rdi
## for some reason, put our argument in the first three floating point registers.
0000000100000e5c	movss	%xmm0, -0x4(%rbp)
0000000100000e61	movss	%xmm1, -0x8(%rbp)
0000000100000e66	movss	%xmm2, -0xc(%rbp)
## call takes_a_vec3, which will only use %rdi.
0000000100000e6b	callq	_takes_a_vec3
## free memory, restore stack frame, return.
## return value is in %xmm0, which was modified in takes_a_vec3.
0000000100000e70	addq	$0x10, %rsp
0000000100000e74	popq	%rbp
0000000100000e75	ret
0000000100000e76	nopw	%cs:(%rax,%rax)

# main - you should know your prototypes!
_main:
## same shit, different prelude
0000000100000e80	pushq	%rbp
0000000100000e81	movq	%rsp, %rbp
## allocate 32 bytes (jolly!)
0000000100000e84	subq	$0x20, %rsp
## @paniq used integers, so 'integer 1' is moved into the %rax register
0000000100000e88	movabsq	$0x1, %rax
## and promptly converted to single-precision floating point, put into %xmm0
0000000100000e92	cvtsi2ssq	%rax, %xmm0
## same goes for 'integer 2' and 'integer 3'.
0000000100000e97	movabsq	$0x2, %rax
0000000100000ea1	cvtsi2ssq	%rax, %xmm1
0000000100000ea6	movabsq	$0x3, %rax
0000000100000eb0	cvtsi2ssq	%rax, %xmm2
## prepare a variable that'll store the result,
## we'll store in the first (-0x4(%rbp)) stack slot we ahve
0000000100000eb5	movl	$0x0, -0x4(%rbp)
## save %edi and %rsi in the second and third slots
0000000100000ebc	movl	%edi, -0x8(%rbp)
0000000100000ebf	movq	%rsi, -0x10(%rbp)
## finally, call!
0000000100000ec3	callq	_takes_three_floats
## at this point, we have the result in %xmm0
## move 'integer 6' into %rax, convert it, store it in %xmm1
0000000100000ec8	movabsq	$0x6, %rax
0000000100000ed2	cvtsi2ssq	%rax, %xmm1
## compare %xmm0 (function return value) and %xmm1 (6.0f constant)
0000000100000ed7	ucomiss	%xmm1, %xmm0
## this is convoluted code to test the value ucomiss returned
## basically, if the two values are equal, some flags will be set
## to 1, 0, 0 - this code sets %rax to 0 if it's true.
0000000100000eda	sete	%cl
0000000100000edd	setnp	%dl
0000000100000ee0	andb	%cl, %dl
0000000100000ee2	xorb	$0x1, %dl
0000000100000ee5	andb	$0x1, %dl
0000000100000ee8	movzbl	%dl, %edi
0000000100000eeb	movslq	%edi, %rax
## if %rax is 0...
0000000100000eee	cmpq	$0x0, %rax
## ...then jump directly to the printf call
0000000100000ef4	je	0x100000f19
## otherwise, prepare to call ___assert_rtn with info on
## where the assert was that failed so the programmer knows.
0000000100000efa	leaq	0x6b(%rip), %rdi ## literal pool for: "main"
0000000100000f01	leaq	0x69(%rip), %rsi ## literal pool for: "fun.c"
0000000100000f08	movl	$0xf, %edx
0000000100000f0d	leaq	0x63(%rip), %rcx ## literal pool for: "takes_three_floats(1,2,3) == 6"
0000000100000f14	callq	0x100000f3a ## symbol stub for: ___assert_rtn
0000000100000f19	jmpq	0x100000f1e
## that's where we jumped if they were equal
0000000100000f1e	leaq	0x71(%rip), %rdi ## literal pool for: "All good."
0000000100000f25	callq	0x100000f40 ## symbol stub for: _puts
## and we're good! *phew*
0000000100000f2a	movl	$0x0, %ecx
0000000100000f2f	movl	%eax, -0x14(%rbp)
0000000100000f32	movl	%ecx, %eax
0000000100000f34	addq	$0x20, %rsp
0000000100000f38	popq	%rbp
0000000100000f39	ret

## fun-O1.s
fun:
(__TEXT,__text) section

# float takes_a_vec3(float *v)
_takes_a_vec3:
## prelude, yadda yadda
0000000100000ea0	pushq	%rbp
0000000100000ea1	movq	%rsp, %rbp
## take the first float argument, store it in single-precision FP register %xmm0
0000000100000ea4	movss	(%rdi), %xmm0
## add the second argument to it
0000000100000ea8	addss	0x4(%rdi), %xmm0
## then the third argument to that
0000000100000ead	addss	0x8(%rdi), %xmm0
## then return - sum is in %xmm0
0000000100000eb2	popq	%rbp
0000000100000eb3	ret
0000000100000eb4	nopw	%cs:(%rax,%rax)

# float takes_three_floats(float x, float y, float z)
_takes_three_floats:
0000000100000ec0	pushq	%rbp
0000000100000ec1	movq	%rsp, %rbp
## allocate 16 bytes
0000000100000ec4	subq	$0x10, %rsp
## arguments were passed through %xmm0, %xmm1, %xmm2 registers,
## not through the stack! hence, '%xmm2' is our 'float z' here.
## take it and store it in our first 'local variable' stack slot
0000000100000ec8	movss	%xmm2, -0x4(%rbp)
## take the address of that local variable, store it in the pointer register
0000000100000ecd	leaq	-0x4(%rbp), %rdi
## .. you know where this is going. _takes_a_vec3 now has the address
## of a local variable (float, equal to 3.0f) - and there's *nothing good*
## near that local variable on the stack. Definitely not 2.0f and 1.0f.
## in fact, the program should crash soon after - but it doesn't, because
## we don't go outside our program's virtual memory area. It just reads garbage
## instead and the result of the addition is (most likely) not 6.0f.
0000000100000ed1	callq	_takes_a_vec3
## free memory & return.
0000000100000ed6	addq	$0x10, %rsp
0000000100000eda	popq	%rbp
0000000100000edb	ret
0000000100000edc	nopl	(%rax)

# main(blah)
_main:
## prelude
0000000100000ee0	pushq	%rbp
0000000100000ee1	movq	%rsp, %rbp
## don't even bother allocating any memory this time - all registers baby
## look at this compiler. It knows you're not using the first two arguments (x and y)
## so it'll pass only the third, into %xmm2. Note that it didn't change take_three_float's prototype.
## if it wasn't in the same module, it wouldn't know that and would be forced to pass all three arguments,
## not knowing what's inside. In fact, moving _take_three_floats to another compilation unit would force
## it to pass all args, as long as no LTO (link-time optimization) is enabled.
0000000100000ee4	movss	0x78(%rip), %xmm2
0000000100000eec	callq	_takes_three_floats
## well, *that* compare is going to fail...
0000000100000ef1	ucomiss	0x70(%rip), %xmm0
## hence, this jump *will* happen
0000000100000ef8	jne	0x100000efc
0000000100000efa	jnp	0x100000f24
## and we get a nice error message.
0000000100000efc	leaq	0x69(%rip), %rax ## literal pool for: "main"
0000000100000f03	leaq	0x67(%rip), %rcx ## literal pool for: "fun.c"
0000000100000f0a	leaq	0x66(%rip), %r8 ## literal pool for: "takes_three_floats(1,2,3) == 6"
0000000100000f11	movl	$0xf, %edx
0000000100000f16	movq	%rax, %rdi
0000000100000f19	movq	%rcx, %rsi
0000000100000f1c	movq	%r8, %rcx
0000000100000f1f	callq	0x100000f34 ## symbol stub for: ___assert_rtn
0000000100000f24	leaq	0x6b(%rip), %rdi ## literal pool for: "All good."
0000000100000f2b	callq	0x100000f3a ## symbol stub for: _puts
0000000100000f30	xorl	%eax, %eax
0000000100000f32	popq	%rbp
0000000100000f33	ret

## fun-O2.S
fun:
(__TEXT,__text) section

# float takes_a_vec3(float *v)
_takes_a_vec3:
## this one is sensibly similar to the O1 version.
## no stack allocation, just pure register goodness.
0000000100000ea0	pushq	%rbp
0000000100000ea1	movq	%rsp, %rbp
0000000100000ea4	movss	(%rdi), %xmm0
0000000100000ea8	addss	0x4(%rdi), %xmm0
0000000100000ead	addss	0x8(%rdi), %xmm0
0000000100000eb2	popq	%rbp
0000000100000eb3	ret
0000000100000eb4	nopw	%cs:(%rax,%rax)

# float takes_three_floats(float x, float y, float z)
_takes_three_floats:
## okay, this one is fun. prelude
0000000100000ec0	pushq	%rbp
0000000100000ec1	movq	%rsp, %rbp
## %xmm0 is x, %xmm1 is y, %xmm2 is z
## z = z + x = 3.0 + 1.0 = 4.0
0000000100000ec4	addss	%xmm0, %xmm2
## z = z + x = 4.0 + 1.0 = 5.0
0000000100000ec8	addss	%xmm0, %xmm2
## x = z
0000000100000ecc	movaps	%xmm2, %xmm0
## return x
##
## ...
##
## Wait, WHAT?
## we didn't call takes_a_vec3 because we've inlined it.
## it didn't crash because constant folding worked as if everything was passed on the stack
## ..except not because the result is 5.0 where it should be 6.0.
## so I have no idea what it did here. (keep reading)
0000000100000ecf	popq	%rbp
0000000100000ed0	ret
0000000100000ed1	nopw	%cs:(%rax,%rax)

# main
## but none of this matters! because main calls neither takes_a_vec3
## nor does it call takes_three_floats
_main:
## prelude, yada yada
0000000100000ee0	pushq	%rbp
0000000100000ee1	movq	%rsp, %rbp
## let's put some FP constant in %xmm0
0000000100000ee4	addss	0x78(%rip), %xmm0
## let's add it to itself (effectively multiplying it by 2)
0000000100000eec	addss	%xmm0, %xmm0
## then let's compare it to some other constant
0000000100000ef0	ucomiss	0x71(%rip), %xmm0
## if it's not good, jump to ___assert_rtn stuff
0000000100000ef7	jne	0x100000efb
## otherwise jump to all good
0000000100000ef9	jnp	0x100000f23
0000000100000efb	leaq	0x6a(%rip), %rax ## literal pool for: "main"
0000000100000f02	leaq	0x68(%rip), %rcx ## literal pool for: "fun.c"
0000000100000f09	leaq	0x67(%rip), %r8 ## literal pool for: "takes_three_floats(1,2,3) == 6"
0000000100000f10	movl	$0xf, %edx
0000000100000f15	movq	%rax, %rdi
0000000100000f18	movq	%rcx, %rsi
0000000100000f1b	movq	%r8, %rcx
0000000100000f1e	callq	0x100000f34 ## symbol stub for: ___assert_rtn
## aaaaand we're good.
0000000100000f23	leaq	0x6c(%rip), %rdi ## literal pool for: "All good."
0000000100000f2a	callq	0x100000f3a ## symbol stub for: _puts
0000000100000f2f	xorl	%eax, %eax
0000000100000f31	popq	%rbp
0000000100000f32	ret

## fun-Z_epilogue.md

      
    Raw
  

              fun-Z_epilogue.md
            
          
    Now, what was that mystery FP constant in the main function of the O2 version that made it work?
If we load lldb and break after the addss 0x78(%rip), %xmm0, then register read xmm0, we get:
(lldb) register read xmm0
    xmm0 = {0x00 0x00 0xc0 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}

Now there's surely a way to display that as a float in lldb but since I didn't know any I made a quick C program to check the value of that:
#include <stdio.h>

int main (int argc, char ** argv) {
  float a;
  char *vals = (char[]) { 0x00 , 0x00 , 0x40 , 0x40 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 };
  a = *((float*) vals);
  printf("a = %f\n", a);
}
And sure enough the value was 3.0.
Doing that with LLDB

Later on, I found how to display those values directly in LLDB.
In fact, XMM registers are 128-bit wide - there's room for 4 single-precision, 32-bit floating point numbers in there.
We can ask lldb to read a register in 'float' format but it doesn't help us:
(lldb) register read xmm0 -f f
    xmm0 = 3.92929226918480722e-4942

Since it interprets xmm0 as.. a 128-bit floatint point value?  or perhaps 80-bit or 64-bit wide, not sure. In any case it should be 3 and it's 3.9 to the power minus five thousands. So nope.
But we can also ask in 'float32' format and that'll give us four entries:
(lldb) register read xmm0 -f float32[]
    xmm0 = {3 0 0 0}

And there we have our 3 :) One instruction later, it's a 6. And it's compared with a constant that I don't know how to dump.
The mystery of the 5.0f

When asking clang to generate LLVM IR (Intermediate Representation) at -O2 optimization level, we get:
; Function Attrs: nounwind readnone ssp uwtable
define float @takes_three_floats(float %x, float %y, float %z) #1 {
  %1 = fadd float %z, undef
  %2 = fadd float %1, undef
  ret float %2
}
So... yep. It adds an undefined value to z, then another undefined value again. And yet in the assembly it chooses to do something in takes_three_floats (which yields 5.0f) and something else in main (which yields 6.0f and passes the assert).
The mystery remains!
The mystery of the 5.0f solved!

In fact it's not 5.0f at all!
Remember how in the O1 version, when it was calling take_three_floats it didn't even bother passing the first two arguments? That's what's happening in O2!
For main

Here's what LLVM thought.
// in main
assert(take_three_floats(1,2,3) == 6)

// alright, we would pass those on the stack... and then in take_three_floats we have
takes_a_vec3(&z)

// so we'd pass the address of the last one
// and then in takes_a_vec3 we'd do
return v[0]+v[1]+v[2];
So it'd rewrite that to:
// in main we could basically do
float x = 1.0;
float y = 2.0;
float z = 3.0;
assert(takes_a_vec3(&z) == 6);

// that'd be the same thing right?
And then it'd rewrite that to:
// well, in takes_a_vec3 we add'em all
// so we could do:
float x = 1.0;
float y = 2.0;
float z = 3.0;
assert((x + y + z) == 6);
Or just:
assert((int) (1.0 + 2.0 + 3.0) == 6);

And everybody knows 1.0 + 2.0 = 3.0, for sure, so we can do:
assert((3.0 + 3.0) == 6.0)

And that's what it does.
For takes_three_floats

Alright, so we have:
float takes_three_floats(float x, float y, float z) {
    // look ma, no hands!
    return takes_a_vec3(&z);
}
But it's stupid cause we only use z! So let's just pretend the first two arguments don't matter.
And we know what takes_a_vec3 does so we can just go ahead and:
float takes_three_floats(float x, float y, float z) {
    return x + y + z;
}
But wait, we said x and y don't matter, we can't use them. Well it just so happens that z = x + y in the only case we're ever called, so we can just do:
float takes_three_floats(float x, float y, float z) {
    // z is stored in %xmm2
    // %xmm0 and %xmm1 are not used at all.
    return z + z;
}
Further confusion

But wait, in our example it just so happened that 1 + 2 + 3 = 3 + 3 . Surely that is not the case everywhere, right?
What if we try with take_three_floats(1,2,4) == 7 ?
$ ./fun
Assertion failed: (takes_three_floats(1,2,4) == 7), function main, file fun.c, line 15.
[1]    72759 abort      ./fun

After inspection, the result returned is 8. Same code in take_three_floats.
How about take_three_floats(1,32767,32768) == 65536 ?
$ ./fun
All good.

How about we add a fourth parameter? have a take_four_floats(1,2,3,4) == 10 ?
$ ./fun
Assertion failed: (takes_four_floats(1, 2, 3, 4) == 12), function main, file fun.c, line 15.
[1]    73583 abort      ./fun

After inspection, the result is 16, and the generated code is... :
        addss   %xmm0, %xmm3
        addss   %xmm0, %xmm3
        addss   %xmm0, %xmm3
        movaps  %xmm3, %xmm0
What the actual fuck, llvm.
What is we change the code inside what is now takes_a_vec4 ?
return v[0]; // yields movaps  %xmm3, %xmm0
return v[0] + v[1]; // yields one addss + movaps
return v[0] + v[1] + v[2]; // yields two addss + movaps
return v[0] + v[1] + v[2] + v[3]; // yields three addss + movaps

So what's inside takes_a_vec4 definitely matter.
TL;DR - undefined behavior is bad, kids, don't do it!
	#include <assert.h>
	#include <stdio.h>

	float takes_a_vec3(float *v) {
	return v[0]+v[1]+v[2];
	}

	float takes_three_floats(float x, float y, float z) {
	// look ma, no hands!
	return takes_a_vec3(&z);
	}

	int main(int argc, char **argv) {
	// passes. why? i know why. do you? :D
	assert(takes_three_floats(1,2,3) == 6);
	puts("All good.");

	return 0;
	}
	fun:
	(__TEXT,__text) section

	# float takes_a_vec3(float *v)
	_takes_a_vec3:
	## prelude - save %rbp (current stack frame)
	0000000100000e20 pushq %rbp
	0000000100000e21 movq %rsp, %rbp
	## save %rdi (pointer register, our argument) on the stack!
	0000000100000e24 movq %rdi, -0x8(%rbp)
	## ..nevermind, load it back, we need it (typical non-optimized code)
	0000000100000e28 movq -0x8(%rbp), %rdi
	## take the first float pointed to by %rdi, put it in %xmm0 (floating point register)
	0000000100000e2c movss (%rdi), %xmm0
	## save %rdi on the stack for some reason...
	0000000100000e30 movq -0x8(%rbp), %rdi
	## adding the second float pointed to by %rdi into %xmm0 (so, 1.0 + 2.0)
	0000000100000e34 addss 0x4(%rdi), %xmm0
	## unnecessary saving again
	0000000100000e39 movq -0x8(%rbp), %rdi
	## and adding the third element (3.0 + 3.0)
	0000000100000e3d addss 0x8(%rdi), %xmm0
	## restore %rbp
	0000000100000e42 popq %rbp
	## and return. the result is in %xmm0, which is expected because the function
	## returns a float (and we return by register here)
	0000000100000e43 ret
	## for some reason many archs insist on clearing rax, so let's clear it.
	0000000100000e44 nopw %cs:(%rax,%rax)

	# float takes_three_floats(float x, float y, float z)
	_takes_three_floats:
	## prelude - stave rbp
	0000000100000e50 pushq %rbp
	0000000100000e51 movq %rsp, %rbp
	## allocate 16 bytes to store our local variables in.
	## the stack grows down - we subtract from %rsp to allocate
	0000000100000e54 subq $0x10, %rsp
	## first arg is at -0x4(%rbp), second is at -0x8, third is at -0xc
	## take the address of the third arg, put it in %rdi
	0000000100000e58 leaq -0xc(%rbp), %rdi
	## for some reason, put our argument in the first three floating point registers.
	0000000100000e5c movss %xmm0, -0x4(%rbp)
	0000000100000e61 movss %xmm1, -0x8(%rbp)
	0000000100000e66 movss %xmm2, -0xc(%rbp)
	## call takes_a_vec3, which will only use %rdi.
	0000000100000e6b callq _takes_a_vec3
	## free memory, restore stack frame, return.
	## return value is in %xmm0, which was modified in takes_a_vec3.
	0000000100000e70 addq $0x10, %rsp
	0000000100000e74 popq %rbp
	0000000100000e75 ret
	0000000100000e76 nopw %cs:(%rax,%rax)

	# main - you should know your prototypes!
	_main:
	## same shit, different prelude
	0000000100000e80 pushq %rbp
	0000000100000e81 movq %rsp, %rbp
	## allocate 32 bytes (jolly!)
	0000000100000e84 subq $0x20, %rsp
	## @paniq used integers, so 'integer 1' is moved into the %rax register
	0000000100000e88 movabsq $0x1, %rax
	## and promptly converted to single-precision floating point, put into %xmm0
	0000000100000e92 cvtsi2ssq %rax, %xmm0
	## same goes for 'integer 2' and 'integer 3'.
	0000000100000e97 movabsq $0x2, %rax
	0000000100000ea1 cvtsi2ssq %rax, %xmm1
	0000000100000ea6 movabsq $0x3, %rax
	0000000100000eb0 cvtsi2ssq %rax, %xmm2
	## prepare a variable that'll store the result,
	## we'll store in the first (-0x4(%rbp)) stack slot we ahve
	0000000100000eb5 movl $0x0, -0x4(%rbp)
	## save %edi and %rsi in the second and third slots
	0000000100000ebc movl %edi, -0x8(%rbp)
	0000000100000ebf movq %rsi, -0x10(%rbp)
	## finally, call!
	0000000100000ec3 callq _takes_three_floats
	## at this point, we have the result in %xmm0
	## move 'integer 6' into %rax, convert it, store it in %xmm1
	0000000100000ec8 movabsq $0x6, %rax
	0000000100000ed2 cvtsi2ssq %rax, %xmm1
	## compare %xmm0 (function return value) and %xmm1 (6.0f constant)
	0000000100000ed7 ucomiss %xmm1, %xmm0
	## this is convoluted code to test the value ucomiss returned
	## basically, if the two values are equal, some flags will be set
	## to 1, 0, 0 - this code sets %rax to 0 if it's true.
	0000000100000eda sete %cl
	0000000100000edd setnp %dl
	0000000100000ee0 andb %cl, %dl
	0000000100000ee2 xorb $0x1, %dl
	0000000100000ee5 andb $0x1, %dl
	0000000100000ee8 movzbl %dl, %edi
	0000000100000eeb movslq %edi, %rax
	## if %rax is 0...
	0000000100000eee cmpq $0x0, %rax
	## ...then jump directly to the printf call
	0000000100000ef4 je 0x100000f19
	## otherwise, prepare to call ___assert_rtn with info on
	## where the assert was that failed so the programmer knows.
	0000000100000efa leaq 0x6b(%rip), %rdi ## literal pool for: "main"
	0000000100000f01 leaq 0x69(%rip), %rsi ## literal pool for: "fun.c"
	0000000100000f08 movl $0xf, %edx
	0000000100000f0d leaq 0x63(%rip), %rcx ## literal pool for: "takes_three_floats(1,2,3) == 6"
	0000000100000f14 callq 0x100000f3a ## symbol stub for: ___assert_rtn
	0000000100000f19 jmpq 0x100000f1e
	## that's where we jumped if they were equal
	0000000100000f1e leaq 0x71(%rip), %rdi ## literal pool for: "All good."
	0000000100000f25 callq 0x100000f40 ## symbol stub for: _puts
	## and we're good! phew
	0000000100000f2a movl $0x0, %ecx
	0000000100000f2f movl %eax, -0x14(%rbp)
	0000000100000f32 movl %ecx, %eax
	0000000100000f34 addq $0x20, %rsp
	0000000100000f38 popq %rbp
	0000000100000f39 ret