indrora/asm.diff

## asm.diff
diff --git a/flag.s b/break.s
index b8ae696..aa174f5 100644
--- a/flag.s
+++ b/break.s
@@ -1,4 +1,4 @@
-       .file   "flag.c"
+       .file   "break.c"
        .section        .rodata
 .LC0:
        .string "start."
@@ -14,7 +14,6 @@ main:
        subl    $32, %esp
        movl    $0, 28(%esp)
        movl    $0, 24(%esp)
-       movl    $0, 20(%esp)
        movl    $.LC0, (%esp)
        call    puts
        movl    $0, 28(%esp)
@@ -24,15 +23,15 @@ main:
        jmp     .L3
 .L6:
        cmpl    $100, 24(%esp)
-       jne     .L4
-       movl    $1, 20(%esp)
+       je      .L9
 .L4:
        addl    $1, 24(%esp)
 .L3:
-       cmpl    $0, 20(%esp)
-       jne     .L5
        cmpl    $104, 24(%esp)
        jle     .L6
+       jmp     .L5
+.L9:
+       nop
 .L5:
        addl    $1, 28(%esp)
 .L2:

## asm.md

      
    Raw
  

              asm.md
            
          
    This is what the Assembly difference means:

they're different files (obviously)
in flag.c, another register must be added at 20(%esp) in order to handle the flag
in .L6, a Compare-Less on one register is made against our index; in flag.c, we have to do a JNE .L4; MOVL $100 20(%esp) to set the flag properly.
in .L3 (condition segment of the for loop), we see that the following has to be added:
cmpl $0 20(%esp)
jne .L5
This is to check that the flag is not set. It is not needed with a break.

The break; based code is much cleaner, faster (in terms of CPU cycles).
Let's imagine a hypothetical CPU:

cmp* operations take 2 cycles
mov* operations take 4 cycles
jne  operations take 2 cycles
jmp  operations take 1 cycle

Our example (break.c and flag.c) are almost a bad example, since a nop costs 0 cycles all things considered. If there was something here, we'd still suffer a neat 8 cycles per iteration. Wow; a thousand iterations later, that's 8000 cycles. at 16MHz, that's a noticeable slowdown.
There's actually a slowdown (measurable one too) when you add a few printf()'s in (to simulate I/O lag):
./break_test > /dev/null  0.88s user 0.01s system 71% cpu 1.245 total
./flag_test > /dev/null  0.92s user 0.02s system 68% cpu 1.351 total

that's at 1.6GHz.

  
## break.c
#include <stdio.h>
void main(void)
{
    int num_times = 0; // Number of counts in the round
    int i=0;
    printf("start.\n");
    for(num_times = 0; num_times < 100000; num_times++)
    {
        for(i=0;i< 105 ;i++)
        {
            if(i==100) break;
        }
    }
    printf("end.\n");
}

## flag.c
include <stdio.h>

void main(void)
{
    int num_times = 0; // Number of counts in the round
    int i=0;
    int flag = 0;
    printf("start.\n");
    for(num_times = 0; num_times < 100000; num_times++)
    {
        flag = 0;
        for(i=0;!flag&& i< 105; i++)
        {
            if(i==100) flag = 1;

        }
    }
    printf("end.\n");
}
	diff --git a/flag.s b/break.s
	index b8ae696..aa174f5 100644
	--- a/flag.s
	+++ b/break.s
	@@ -1,4 +1,4 @@
	- .file "flag.c"
	+ .file "break.c"
	.section .rodata
	.LC0:
	.string "start."
	@@ -14,7 +14,6 @@ main:
	subl $32, %esp
	movl $0, 28(%esp)
	movl $0, 24(%esp)
	- movl $0, 20(%esp)
	movl $.LC0, (%esp)
	call puts
	movl $0, 28(%esp)
	@@ -24,15 +23,15 @@ main:
	jmp .L3
	.L6:
	cmpl $100, 24(%esp)
	- jne .L4
	- movl $1, 20(%esp)
	+ je .L9
	.L4:
	addl $1, 24(%esp)
	.L3:
	- cmpl $0, 20(%esp)
	- jne .L5
	cmpl $104, 24(%esp)
	jle .L6
	+ jmp .L5
	+.L9:
	+ nop
	.L5:
	addl $1, 28(%esp)
	.L2:
	#include <stdio.h>
	void main(void)
	{
	int num_times = 0; // Number of counts in the round
	int i=0;
	printf("start.\n");
	for(num_times = 0; num_times < 100000; num_times++)
	{
	for(i=0;i< 105 ;i++)
	{
	if(i==100) break;
	}
	}
	printf("end.\n");
	}
	include <stdio.h>

	void main(void)
	{
	int num_times = 0; // Number of counts in the round
	int i=0;
	int flag = 0;
	printf("start.\n");
	for(num_times = 0; num_times < 100000; num_times++)
	{
	flag = 0;
	for(i=0;!flag&& i< 105; i++)
	{
	if(i==100) flag = 1;

	}
	}
	printf("end.\n");
	}