pb-uk/c-increment

## c-increment
This is part of a conversation resulting from a
[Post on PhysicsForums](https://www.physicsforums.com/threads/trying-to-decide-which-programming-language-i-want-to-learn.991177/post-6378518).

## Source code (simple Fibonnaci sequence)"
```cpp
#include <iostream>
using namespace std;

int main() {
  int a[100] = {0, 1};
  int k = 2;
  int f_k_minus_1 = 1;
  int f_k_minus_2 = 1;
  int f_k;
  while (f_k < 1000) {
    f_k = f_k_minus_1 + f_k_minus_2;
    f_k_minus_2 = f_k_minus_1;
    f_k_minus_1 = f_k;
    // Either (bad):
    a[k++] = f_k;
    // Or (good):
    // a[k] = f_k;
    // k++;
  }
  cout << a[k - 1];
  return 0;
}
```

## Compiled using `g++ -S` (i.e. no optimisation) g++ version 7.5.0 on x86_64

Presumably because it is trying to make the object code easier to debug via a core dump the 'bad' code results in an additional save to memory.

### Using `a[k++]`
```asm
        movl    -432(%rbp), %eax        ; Load k into ax.
        leal    1(%rax), %edx           ; Load k + 1 into dx.
        movl    %edx, -432(%rbp)        ; Save dx into k.
        cltq
        movl    -420(%rbp), %edx        ; Load f_k into dx.
        movl    %edx, -416(%rbp,%rax,4) ; Save dx into a[ax] (the old k).
```

### Using `a[k]; k++`
```asm
        movl    -432(%rbp), %eax        ; Load k into ax.
        cltq
        movl    -420(%rbp), %edx        ; Load f_k into dx.
        movl    %edx, -416(%rbp,%rax,4) ; Save dx into a[ax] (we haven't incremented k yet).
        addl    $1, -432(%rbp)          ; Add 1 to k;
```

## Compiled using `g++ -S -O` g++ version 7.5.0 on x86_64

Now the compiler is not worried about an informative core dump it uses registers for all intermediate values and comes up with almost the same object code for both 'good' and 'bad' sources - but separating the increment from the array indexing still saves 1 instruction, although only on the first iteration of the loop.

### Using `a[k++]`
```asm
.L3:
        leal    (%rsi,%rcx), %edx
        movl    %eax, %edi
        movl    %edx, -4(%rsp,%rax,4)
        addq    $1, %rax
        movl    %ecx, %esi
        movl    %edx, %ecx
        cmpl    $999, %edx
        jle     .L3
```

### Using `a[k]; k++;`
```asm
        jmp     .L3
.L6:
        movl    %edx, %ecx
.L3:
        leal    (%rcx,%rsi), %edx
        movl    %edx, -4(%rsp,%rax,4)
        movl    %eax, %edi
        addq    $1, %rax
        movl    %ecx, %esi
        cmpl    $999, %edx
        jle     .L6
```

Conclusion: `a[i++];` may be quicker to type but is harder to maintain and may well run slower than `a[i]; i++;`.
	This is part of a conversation resulting from a
	[Post on PhysicsForums](https://www.physicsforums.com/threads/trying-to-decide-which-programming-language-i-want-to-learn.991177/post-6378518).

	## Source code (simple Fibonnaci sequence)"
	```cpp
	#include <iostream>
	using namespace std;

	int main() {
	int a[100] = {0, 1};
	int k = 2;
	int f_k_minus_1 = 1;
	int f_k_minus_2 = 1;
	int f_k;
	while (f_k < 1000) {
	f_k = f_k_minus_1 + f_k_minus_2;
	f_k_minus_2 = f_k_minus_1;
	f_k_minus_1 = f_k;
	// Either (bad):
	a[k++] = f_k;
	// Or (good):
	// a[k] = f_k;
	// k++;
	}
	cout << a[k - 1];
	return 0;
	}
	```

	## Compiled using `g++ -S` (i.e. no optimisation) g++ version 7.5.0 on x86_64

	Presumably because it is trying to make the object code easier to debug via a core dump the 'bad' code results in an additional save to memory.

	### Using `a[k++]`
	```asm
	movl -432(%rbp), %eax ; Load k into ax.
	leal 1(%rax), %edx ; Load k + 1 into dx.
	movl %edx, -432(%rbp) ; Save dx into k.
	cltq
	movl -420(%rbp), %edx ; Load f_k into dx.
	movl %edx, -416(%rbp,%rax,4) ; Save dx into a[ax] (the old k).
	```

	### Using `a[k]; k++`
	```asm
	movl -432(%rbp), %eax ; Load k into ax.
	cltq
	movl -420(%rbp), %edx ; Load f_k into dx.
	movl %edx, -416(%rbp,%rax,4) ; Save dx into a[ax] (we haven't incremented k yet).
	addl $1, -432(%rbp) ; Add 1 to k;
	```

	## Compiled using `g++ -S -O` g++ version 7.5.0 on x86_64

	Now the compiler is not worried about an informative core dump it uses registers for all intermediate values and comes up with almost the same object code for both 'good' and 'bad' sources - but separating the increment from the array indexing still saves 1 instruction, although only on the first iteration of the loop.

	### Using `a[k++]`
	```asm
	.L3:
	leal (%rsi,%rcx), %edx
	movl %eax, %edi
	movl %edx, -4(%rsp,%rax,4)
	addq $1, %rax
	movl %ecx, %esi
	movl %edx, %ecx
	cmpl $999, %edx
	jle .L3
	```

	### Using `a[k]; k++;`
	```asm
	jmp .L3
	.L6:
	movl %edx, %ecx
	.L3:
	leal (%rcx,%rsi), %edx
	movl %edx, -4(%rsp,%rax,4)
	movl %eax, %edi
	addq $1, %rax
	movl %ecx, %esi
	cmpl $999, %edx
	jle .L6
	```

	Conclusion: `a[i++];` may be quicker to type but is harder to maintain and may well run slower than `a[i]; i++;`.