Skip to content

Instantly share code, notes, and snippets.

@pitr
Forked from HeinrichHartmann/Makefile
Last active May 2, 2021 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pitr/d196e2868e192af840d0075770b3c513 to your computer and use it in GitHub Desktop.
Save pitr/d196e2868e192af840d0075770b3c513 to your computer and use it in GitHub Desktop.
Search Sum Exercise in x86-Assembly
;; Arguments:
;;
;; rdi = int* arr
;; rsi = int len
;; edx = int sum
;; rcx = int *r
;; r8 = int *l
;;
;; Alias Register Names
%define arr_p rdi
%define r esi
%define l r9d
%define sum edx
GLOBAL _search_sum_asm
_search_sum_asm:
mov l, 0
mov dword [rcx], 0
mov dword [r8], 0
dec r
.loop:
cmp l, r
jge .not_found
movsxd r10, l
movsxd rax, r
mov eax, [arr_p + 4*rax]
add eax, [arr_p + 4*r10]
cmp eax, sum
jl .lower
jg .greater
je .found
.lower:
inc l
jmp .loop
.greater:
dec r
jmp .loop
.not_found:
mov rax, 0
ret
.found:
mov rax, 1
mov [rcx], r
mov [r8], l
ret
GLOBAL _search_sum_asm_bit
%define t r10d
%define ll r11d
%define rr r15d
_search_sum_asm_bit:
cmp r, 2
jl .not_found
dec r
mov l, 0
mov rr, 0
.loop:
movsxd r10, l
movsxd rax, r
mov eax, [arr_p + 4*rax]
add eax, [arr_p + 4*r10]
sub eax, sum
xor t, t
sub t, eax
shr t, 31
xor t, 1 ;; t=1 if diff <= 0
shr eax, 31
mov ll, l
add l, eax ;; l++ if diff < 0
xor eax, 1 ;; eax=1 if diff >= 0
mov rr, r
sub r, eax ;; r-- if diff >= 0
and t, eax ;; t=1 if diff=0 (<=0 && >=0) ie. found
xor t, 1
sub t, 1 ;; t=ff.. if found, else 0
and t, r ;; t=r if found, else 0
sub r, t ;; r=0 if found, ie. break loop
cmp l, r
jl .loop
cmp rr, 0
je .not_found
.found:
mov eax, 1
mov [rcx], rr
mov [r8], ll
ret
.not_found:
mov eax, 0
ret
/*
* Task:
*
* Given an sorted array of integers e.g. a = [ 1, 2, 5, 7, 11 ] and an integer e.g. sum = 8.
* Find distinct indices x,y so that a[x] + a[y] = sum, if this is possible.
*
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
/* array length */
#define LEN(a) sizeof(a) / sizeof(a[0])
/*
* Arguments:
* - int *arr -- pointer to array
* - int arr_len -- length of array
* - int sum -- sum
* - int *x,*y -- place to put solution
*
* Return: 1 if match is found, 0 otherwise.
*/
int search_sum_c(int *arr, int arr_len, int sum, int *x, int *y) {
int l = 0, r = arr_len - 1;
while (l < r) {
int s = arr[l] + arr[r];
if (s < sum) l++;
else if (s > sum) r--;
else if (s == sum) {
*x = r;
*y = l;
return 1;
}
};
return 0;
};
typedef int search_sum_func(int*, int, int, int*, int*);
/* Declare asm implementation that will be linked */
extern search_sum_func search_sum_asm;
extern search_sum_func search_sum_asm_bit;
void test_search_sum(int *arr, int len, int sum, int expected, search_sum_func f) {
int r,l;
clock_t begin = clock();
int ok = f(arr, len, sum, &r, &l);
printf("[%.0fms] ", (double)(clock() - begin) * 1000 / CLOCKS_PER_SEC);
if (len < 100) {
printf("A = [ ");
for (int i = 0; i < len; i++) {
printf("%d ", arr[i]);
}
printf("\b ] ");
}
printf("sum = %d; ", sum);
if (!ok) {
printf("No match found => ");
if (!expected) {
printf("OK\n");
} else {
printf("FAILED\n");
}
} else {
printf("Found @ (%d,%d) => ", l, r);
if (arr[r] + arr[l] == sum && expected) {
printf("OK\n");
} else {
printf("FAILED\n");
}
}
}
void pertest_search_sum() {
srand(100);
int len = 50<<20;
int *arr = malloc(len*sizeof(int)); // 200MB
// random, sorted, even numbers, 1 number in the middle is odd
for (int i = 0; i < len;i++) arr[i] = 4*i + (i == len/2 ? 1 : (rand()%3)<<1);
// sum is 2 numbers in the middle (one of which is odd)
int sum = arr[len/2] + arr[len/2+1];
printf("## perf search_sum_c\n");
test_search_sum(arr, len, sum, 1, search_sum_c);
test_search_sum(arr, len, sum, 1, search_sum_c);
test_search_sum(arr, len, sum, 1, search_sum_c);
printf("## perf search_sum_asm\n");
test_search_sum(arr, len, sum, 1, search_sum_asm);
test_search_sum(arr, len, sum, 1, search_sum_asm);
test_search_sum(arr, len, sum, 1, search_sum_asm);
printf("## perf search_sum_asm_bit\n");
test_search_sum(arr, len, sum, 1, search_sum_asm_bit);
test_search_sum(arr, len, sum, 1, search_sum_asm_bit);
test_search_sum(arr, len, sum, 1, search_sum_asm_bit);
}
void test(search_sum_func f) {
int arr[] = {1, 2, 6, 7, 9, 12};
int sum = 8;
test_search_sum(arr, LEN(arr), sum, 1, f);
int arr2[] = {1, 7};
int sum2 = 8;
test_search_sum(arr2, LEN(arr2), sum2, 1, f);
int arr3[] = {};
int sum3 = 8;
test_search_sum(arr3, LEN(arr3), sum3, 0, f);
int arr4[] = {1, 2, 3};
int sum4 = 0;
test_search_sum(arr4, LEN(arr4), sum4, 0, f);
int arr5[] = {4};
int sum5 = 8;
test_search_sum(arr5, LEN(arr5), sum5, 0, f);
int arr6[] = {4, 4};
int sum6 = 8;
test_search_sum(arr6, LEN(arr6), sum6, 1, f);
}
int main() {
printf("## test search_sum_c\n");
test(search_sum_c);
printf("## test search_sum_asm\n");
test(search_sum_asm);
printf("## test search_sum_asm_bit\n");
test(search_sum_asm_bit);
pertest_search_sum();
return 0;
}
main: main.c lib.asm
nasm -f macho64 lib.asm
$(CC) -O3 -o main lib.o main.c
run: main
./main
./main
## test search_sum_c
[0ms] A = [ 1 2 6 7 9 12 ] sum = 8; Found @ (0,3) => OK
[0ms] A = [ 1 7 ] sum = 8; Found @ (0,1) => OK
[0ms] A = [ ] sum = 8; No match found => OK
[0ms] A = [ 1 2 3 ] sum = 0; No match found => OK
[0ms] A = [ 4 ] sum = 8; No match found => OK
[0ms] A = [ 4 4 ] sum = 8; Found @ (0,1) => OK
## test search_sum_asm
[0ms] A = [ 1 2 6 7 9 12 ] sum = 8; Found @ (0,3) => OK
[0ms] A = [ 1 7 ] sum = 8; Found @ (0,1) => OK
[0ms] A = [ ] sum = 8; No match found => OK
[0ms] A = [ 1 2 3 ] sum = 0; No match found => OK
[0ms] A = [ 4 ] sum = 8; No match found => OK
[0ms] A = [ 4 4 ] sum = 8; Found @ (0,1) => OK
## test search_sum_asm_bit
[0ms] A = [ 1 2 6 7 9 12 ] sum = 8; Found @ (0,3) => OK
[0ms] A = [ 1 7 ] sum = 8; Found @ (0,1) => OK
[0ms] A = [ ] sum = 8; No match found => OK
[0ms] A = [ 1 2 3 ] sum = 0; No match found => OK
[0ms] A = [ 4 ] sum = 8; No match found => OK
[0ms] A = [ 4 4 ] sum = 8; Found @ (0,1) => OK
## perf search_sum_c
[161ms] sum = 209715207; Found @ (26214400,26214401) => OK
[162ms] sum = 209715207; Found @ (26214400,26214401) => OK
[156ms] sum = 209715207; Found @ (26214400,26214401) => OK
## perf search_sum_asm
[166ms] sum = 209715207; Found @ (26214400,26214401) => OK
[161ms] sum = 209715207; Found @ (26214400,26214401) => OK
[167ms] sum = 209715207; Found @ (26214400,26214401) => OK
## perf search_sum_asm_bit
[247ms] sum = 209715207; Found @ (26214400,26214401) => OK
[238ms] sum = 209715207; Found @ (26214400,26214401) => OK
[240ms] sum = 209715207; Found @ (26214400,26214401) => OK
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment