Skip to content

Instantly share code, notes, and snippets.

@gurchik
Last active September 14, 2018 12:34
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gurchik/9dff75a67710207e16cd6a8531393ccf to your computer and use it in GitHub Desktop.
Save gurchik/9dff75a67710207e16cd6a8531393ccf to your computer and use it in GitHub Desktop.
A heavily-documented RPN calculator written in pure x86 assembly
; Allow the linker to find the _start symbol. The linker will begin program execution
; there.
global _start
; Start the .data section of the executable, which stores constants (read-only data)
; It doesn't matter which order your sections are in, I just like putting .data first
section .rodata
; Declare some bytes at a symbol called error_msg. NASM's db pseudo-instruction
; allows either a single byte value, a constant string, or a combination of the two
; as seen here. 0xA = new line, and 0x0 = string-terminating null
error_msg: db "Invalid input", 0xA, 0x0
; Start the .data section, which stores variable data
section .data
stack_size: dd 0 ; create a dword (4-byte) variable set to zero
stack: times 256 dd 0 ; fill the stack with 256 dword zeroes
; Start the .text section, which stores program code
_start:
; you do not get the arguments of _start the same way you do in other functions.
; instead, esp points directly to argc (the number of arguments), and esp+4 points
; to argv. Therefore, esp+4 points to the name of your program, esp+8 points to
; the first argument, etc
mov esi, [esp+8] ; esi = "input" = argv[0]
; call _strlen to find the length of the input
push esi
call _strlen
mov ebx, eax ; ebx = input_length
add esp, 4
; end _strlen call
mov ecx, 0 ; ecx = "i"
_main_loop_start:
cmp ecx, ebx ; if (i >= input_length)
jge _main_loop_end
mov edx, 0
mov dl, [esi + ecx] ; load only a byte from memory into the lower byte of
; edx. We set the rest of edx to zero.
; edx = c variable = input[i]
cmp edx, '0'
jl _check_operator
cmp edx, '9'
jg _print_error
sub edx, '0'
mov eax, edx ; eax = c variable - '0' (the numeric digit, not char)
jmp _push_eax_and_continue
_check_operator:
; call _pop twice to pop b into edi and a into eax
push ecx
push ebx
call _pop
mov edi, eax ; edi = b
call _pop ; eax = a
pop ebx
pop ecx
; end call _pop
cmp edx, '+'
jne _subtract
add eax, edi ; eax = a+b
jmp _push_eax_and_continue
_subtract:
cmp edx, '-'
jne _multiply
sub eax, edi ; eax = a-b
jmp _push_eax_and_continue
_multiply:
cmp edx, '*'
jne _divide
imul eax, edi ; eax = a*b
jmp _push_eax_and_continue
_divide:
cmp edx, '/'
jne _print_error
push edx ; save edx since we'll need to set it to 0 for idiv
mov edx, 0
idiv edi ; eax = a/b
pop edx
; now we push eax and continue
_push_eax_and_continue:
; call _push
push eax
push ecx
push edx
push eax ; arg1
call _push
add esp, 4
pop edx
pop ecx
pop eax
; end call _push
inc ecx
jmp _main_loop_start
_main_loop_end:
cmp byte [stack_size], 1 ; if (stack_size != 1) print error
jne _print_error
mov eax, [stack]
push eax
call _print_answer
; print a final newline
push 0xA
call _putc
; exit successfully
mov eax, 0x01 ; 0x01 = exit()
mov ebx, 0 ; 0 = no errors
int 0x80 ; execution will end here
_print_error:
push error_msg
call _print_msg
mov eax, 0x01
mov ebx, 1
int 0x80
%define MAX_DIGITS 10
_print_answer:
enter 1, 0 ; we'll use 1 byte for "started" variable in C
push ebx
push edi
push esi
mov eax, [ebp+8] ; our "a" argument
cmp eax, 0 ; if the number is not negative, skip this if-statement
jge _print_answer_negate_end
; call putc for '-'
push eax
push 0x2d ; '-' character
call _putc
add esp, 4
pop eax
neg eax ; convert a to positive
_print_answer_negate_end:
mov byte [ebp-4], 0 ; started = 0
mov ecx, MAX_DIGITS ; our i variable
_print_answer_loop_start:
cmp ecx, 0
je _print_answer_loop_end
; call pow_10 for ecx. We'll be trying to get ebx to be out "digit" variable in C.
; For now we'll get edx = pow_10(i-1) and ebx = pow_10(i)
push eax
push ecx
dec ecx ; i-1
push ecx ; arg1 for _pow_10
call _pow_10
mov edx, eax ; edx = pow_10(i-1)
add esp, 4
pop ecx ; restore ecx to i
pop eax
; end pow_10 call
mov ebx, edx ; digit = ebx = pow_10(i-1)
imul ebx, 10 ; digit = ebx = pow_10(i)
; call _mod for (a % pow_10(i)), which is (eax mod ebx)
push eax
push ecx
push edx
push ebx ; arg2, ebx = digit = pow_10(i)
push eax ; arg1, eax = a
call _mod
mov ebx, eax ; digit = ebx = a % pow_10(i+1), almost there
add esp, 8
pop edx
pop ecx
pop eax
; end mod call
; divide ebx ("digit" var) by pow_10(i) (edx). We'll need to save a few registers
; since idiv requires both edx and eax for the dividend. Since edx is our divisor,
; we'll need to move it to some other register
push esi
mov esi, edx
push eax
mov eax, ebx
mov edx, 0
idiv esi ; eax holds the result (the digit)
mov ebx, eax ; ebx = (a % pow_10(i)) / pow_10(i-1), the "digit" variable in C
pop eax
pop esi
; end division
cmp ebx, 0 ; if digit == 0
jne _print_answer_trailing_zeroes_check_end
cmp byte [ebp-4], 0 ; if started == 0
jne _print_answer_trailing_zeroes_check_end
jmp _print_answer_loop_continue ; continue
_print_answer_trailing_zeroes_check_end:
mov byte [ebp-4], 1 ; started = 1
add ebx, 0x30 ; digit + '0'
; call putc
push eax
push ecx
push edx
push ebx
call _putc
add esp, 4
pop edx
pop ecx
pop eax
; end call putc
_print_answer_loop_continue:
sub ecx, 1
jmp _print_answer_loop_start
_print_answer_loop_end:
pop esi
pop edi
pop ebx
leave
ret
_pow_10:
enter 0, 0
mov ecx, [ebp+8] ; set ecx (caller-saved) to function arg
mov eax, 1 ; first power of 10 (10**0 = 1)
_pow_10_loop_start:
cmp ecx, 0
je _pow_10_loop_end
imul eax, 10
sub ecx, 1
jmp _pow_10_loop_start
_pow_10_loop_end:
leave
ret
_mod:
enter 0, 0
push ebx
mov edx, 0 ; explained below
mov eax, [ebp+8]
mov ebx, [ebp+12]
idiv ebx ; divides the 64-bit integer [edx:eax] by ebx. We only want to divide the
; 32-bit integer eax, so we set edx to zero. The result of this division
; is stored in eax, and the remainder is stored in edx
mov eax, edx ; return the modulus
pop ebx
leave
ret
_putc:
enter 0, 0
mov eax, 0x04 ; write()
mov ebx, 1 ; standard out
lea ecx, [ebp+8] ; the input character
mov edx, 1 ; print only 1 character
int 0x80
leave
ret
_push:
enter 0, 0
; Save the callee-saved registers that I'll be using
push eax
push edx
mov eax, [stack_size]
mov edx, [ebp+8]
mov [stack + 4*eax], edx ; Insert the arg into the stack. We scale
; by 4 because each dword is 4 bytes each
inc dword [stack_size] ; Add 1 to stack_size
; Restore the callee-saved registers we used
pop edx
pop eax
leave
ret
_pop:
enter 0, 0
; Save the callee-saved registers
dec dword [stack_size] ; Subtract 1 from stack_size first
mov eax, [stack_size]
mov eax, [stack + 4*eax] ; Set the number at the top of the stack to eax
; Here I'd restore the callee-saved registers, but I didn't save any
leave
ret
_print_msg:
enter 0, 0
; My function begins here
mov eax, 0x04 ; 0x04 = the write() syscall
mov ebx, 0x1 ; 0x1 = standard output
mov ecx, [ebp+8] ; the string we want to print is the first argument of this function
; at this point we wish to set edx to the length of the string. time to call _strlen
push eax ; save the caller-saved registers (I choose to not save edx)
push ecx
push dword [ebp+8] ; push _strlen's argument, the string argument to _print_msg. NASM
; complains if you do not put a size directive here, and I'm not sure
; why. Anyway, a pointer is a dword (4 bytes)
call _strlen ; eax is now equal to the length of the string
mov edx, eax ; move the length into edx where we wanted it
add esp, 4 ; remove 4 bytes from the stack (one 4-byte char* argument)
pop ecx ; restore caller-saved registers
pop eax
; we're done calling _strlen and setting up the syscall
int 0x80
leave
ret
_strlen:
enter 0, 0 ; save the previous frame's base pointer and adjust ebp
; Here I'd save the callee-saved registers, but I won't be modifying any
; My function begins here
mov eax, 0 ; length = 0
mov ecx, [ebp+8] ; copy the function's first argument (pointer to the first character
; of the string) into ecx (which is caller-saved, so no need to save it)
_strlen_loop_start:
cmp byte [ecx], 0 ; dereference that pointer and compare it to null. Here we have to
; explicitly mention it's a byte since the size of the pointer is
; ambiguous (is it a 4 byte integer? 2? 1?). This is called called a
; Size Directive
je _strlen_loop_end ; jump out of the loop if it is equal to null
add eax, 1 ; add 1 to our return value
add ecx, 1 ; increment to the next character in the string
jmp _strlen_loop_start ; jump back to the start
_strlen_loop_end:
; My function ends here. eax is equal to my function's return value
; Here I'd restore the callee-saved registers, but I didn't save any
leave ; deallocate and restore the previous frame's base pointer
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment