sabahtalateh/x86_asm.sh

## x86_asm.sh
# Some assembler instructions
#
# x86 - Little endian architecture, bytes stores in reversed order
#
# Move (MOV)
# @@@@@@@@@@
#
# mov dst, src  - Copy data from source to destination
# mov eax, 8CBh - Put 8CB number to 32-bit eax register, h at the end is for converting number to HEX representation (base 16).
# mov ecx, ebx  - Copy 32-bit number from ebx to ecx.
# mov si, cx    - Copy a number from si to cx, 16-bit copy.
#
# Source and Destination must have the same size
# mov ecx, dh   - Invalid instruction ecx - 32-bits size, dh - 8-bits size.
#
# Addition (ADD)
# @@@@@@@@@@@@@@
# add dst, src    - dst <- dst + src, dest will change, source will stay the same.
# The result of adding will be truncated in case of overflow
# add eax, edx    - eax <- (eax + edx) mod 2^32
# add eax, 11b    - eax <- (eax + 11b) mod 2^32, 11b - b - binary (base 2), 11b = 3
#
# ! Source and Destination should be campatible by size
#
# Subtraction (SUB)
# sub dst, src    - dst <- dst - src or dst <- dst + (-src), negation of source is done with 2's negation method.
# sub eax, edx    - eax <- eax - edx
# sub esi, 4h     - esi <- esi - 4h, h for hexadecimal.
#
# Increment (INC) and Decrement (DEC)
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# inc eax         - eax <- (eax + 1) mod 2^32
# dec si          - si <- (si - 1) mod 2^32
#
# Multiplication (MUL)
# MUL arg         - Unsigned multiplication
# Some forms:
# 1. ax <- al * arg       - if argument is of size 8 bits.
# 2. dx:ax <- ax * arg    - if argument is of size 16 bits.
# 3. edx:eax <- eax * arg - if argument is of size 32 bits.
#
# edx:eax means bits concatenation in both registers
# The size of the result is larger than the argument size (Twise the amount of bits).
#
# mul ecx         - edx:eax <- eax * ecx
# mul si          - dx:ax <- ax * si
# mul al          - ax <- al * al
#
# multiplication can accept as argument only the register
# mul 2Ch         - invalid
#
# Division (DIV)
# Unsigned division
# Some forms:
# 1. If arg is 8 bits size
#    al <- ax / arg       - Quotient
#    ah <- ax % arg       - Remainder
# 2. If arg is 16 bits size
#    ax <- dx:ax / arg
#    dx <- dx:ax % arg
# 3. If arg is 32 bits size
#    eax <- edx:eax / arg
#    edx <- edx:eax % arg
# div ch          - ch - 8 bits size, al <- ax / ch, ah <- ax % ch
# div esi         - esi - 32 bits size, eax <- edx:eax / esi, edx <- edx:eax % esi
# div di          - di - 16 bits size, ax <- dx:ax / di, dx <- dx:ax % di
# div 5Ah         - invalid example
# Exception will raise in case of division by zero and quotient overflow
#
# Instruction Pointer
# EIP - Extended Instruction Pointer (RIP in 64 bits)
# If we want to execute code from different location we should change the Instruction Pointer,
#  it contains the address of the current instruction.
# In protected mode it could not be changed directly (mov eip,eax - invalid)
#
# Jump (JMP) allows to set a value of EIP
# Jump will continue program from given address
#
# jmp ecx         - change eip value to what in ecx, continue program from the address in ecx
# jmp 777d1044h   - eip <- 0x777d1044h
# jmp my_label    - jump to label, labels are special marks in code, it ends with : symbol
#
# Labels in code will be translated into the actual memory addresses on the assembling stage
# While the program is running eip is always points to the current instruction address
#
# Jump can be relative or absolute
# Relative means set eip to current location plus some bytes
# Absolute means jump to some specific address
# Assembler choose what to use by himself, often it picks the relative one because it shorter
#
#
# Flags Register
# Used to make branches and decisions in code
# Some specific flags inside the flags register
#
# Zero flag
# Sign flag
# Carry flag
# Overflow flag
#
# No direct access to this register, every bit in the register is a flag that represents True or False
# This is how it looks like https://ru.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D0%B8%D1%81%D1%82%D1%80_%D1%84%D0%BB%D0%B0%D0%B3%D0%BE%D0%B2
#
# Zero flag is set to 1 when the last calculation had the zero result
#  it cleared (set to 0) whenever the last calculation had non zero result
# mov eax, 3h
# mov ecx, 3h
# sub eax, ecx
# Zero flag will be set to 1
#
# mov eax, 3h
# mov ecx, 3h
# add eax, ecx
# After the add the Zero flag will be set to 0
#
# Sign flag equals the most significant bit of the last result
# 0 if result is positive in 2's complement form
# 1 if result is negative
# mov edx, 0
# dec edx
# Sign flag will be set to 1, result is negative
#
# mov edx, 0
# inc edx
# Sign flag will be set to 0, result is positive
#
# Carry flag
# It understands unsigned addition and subtraction,
#  it set if the addition of two numbers causes carry out the most significant bit (Leftmost bit)
# mov eax, ffffffffh
# inc eax
# Carry flag is set to 1
# Tells you that the result of adding two unsigned numbers is wrong
#
# It also set if the subtraction requires a borrow into the most significant bit
# mov ecx, 0
# mov edx, 11b
# sub ecx, edx
# if there is no bits to borrow then we borrow from some imaginary bit (1 is out of the ecx)
# ecx  1 | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 |
# edx  - | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 1 1 |
#        | 1 1 1 1 1 1 1 1 | 1 1 1 1 1 1 1 1 | 1 1 1 1 1 1 1 1 | 1 1 1 1 1 1 0 1 |
# Carry flag is set to 1
#
# Overflow flag
# Understands signed addition and subtraction
# Addition:
#  Set if the addition of two positive numbers has a negative result
#  Set if the addition of two negative numbers has a positive result
# Subtraction:
#  Set if "+" - "-" = "-"
#  Set if "-" - "+" = "+"
# Tells you if there is something wrong with your signed addition or subtraction
# "+" + "-", "+" - "+", "-" - "-" can not set the overflow flag because the result can be
#  both positive or negative
# To clear the overflow flag you can add positive and negative numbers
#
# mov al, 7fh
# mov cl, 1h
# add al, cl
#     | 7 | f |
# al   01111111 - positive, leading 0
#     | 0 | 1 |
# cl   00000001 - positive, leading 0
# res  10000000 - negative in 2's complement form - 0x80
# Overflow flag is set "+" + "+" = "-"
#
# If work with unsigned numbers look at carry flag
# If work with signed numbers look at overflow flag
#
# Basic comditional branching
# Jumping according to flags
# JMP - Uncoditional jump
# J<xx> - xx - some condition - conditional jump
# JZ - jump if zero flag is set - 1
# JNZ - jump if zero flaf is not set - 0
# JS/JNS - Check signed flag
# JC/JNC - Check carry flag
# JO/JNO - Check overflow flag
# Jump instruction it is indirect way to read the flags register
#
# To compare 2 unsigned numbers you can do something like this
# mov     eax,    99h
# mov     ebx,    100h
# sub     eax,    ebx
# jc      a_lt_b
# jnc     b_lt_a
# Via the subtraction the carry flag will be set in case of the first number is less then the second
# But in this case the value of eax will be overwritten, to prevent this we could use CMP
#  CMP worls like SUB but it doesn't store the result, just affect flags
# To compare unsigned numbers use staff like this
# cmp a, b
# jb        - jump if below
# jbe       - jump if below or equals
# ja/jae    - jump if above or equals
#
# JB        CF = 1            (same as JC)
# JBE       CF = 1 | ZF = 1
# JA        CF = 0 | ZF = 0
# JAE       CF = 0            (same as JNC)
#
# For signed numbers we may use CMP and then check Sign, Overflow and Zero flag
# Instead of this we can use the following instructions
# jl/jle    - jump if less/equals
# jg/jge    - jump if greater/equals
#
# JG        SF = OF & ZF = 0
# JGE       SF = OF
# JL        SF != OF
# JLE       SF != OF & ZF = 1
# here https://www.udemy.com/x86-asm-foundations/learn/v4/t/lecture/1372092?start=0 you can get mathematical
#  prove of this table
# When the OF Overflow Flag = 0
# - No overflow occured - The result has the "correct" sign
#   - SF Sign Flag = 0 => a - b >= 0 => a >= b (OF = SF = 0)
#   - SF Sign Flag = 1 => a - b < 0 => a < b (0 = OF != SF = 1)
# When OF = 1
# - Overflow occured - The result has "wrong" sign
#   - SF = 0 => a < b (1 = OF != SF = 0)
# 0xfe - 0x7f (-2 - 127)
# 1111 1110 - 0111 1111 = 1111 1110 + 1000 0001
#   1111 1110
#   1000 0001
# 1 0111 1111 = 0x7D(127)
# o s
#
#   - SF = 1 => a >= b (OF = SF = 1)
# 7f - ff (127 - (-1))
# 0111 1111 - 1111 1111 = 0111 1111 + 0000 0001
#   0111 1111
#   0000 0001
#   1000 0000 = 0x80(-128)
#   s
#   o
#
# It all means that a >= b => OF = SF which is the JGE instruction,
#  other instructions can be derived form this
#
# NEG - Negate
# Find the 2's complement negation for a number (Flip all the bits and add 1)
# 0 0 0 0 0 0 1 1
# 1 1 1 1 1 1 0 1 - NEG
# When extending signed number from for example 8 bits to 16 bits
#  first 8 bits will be filled with sign bit of the number
#
# There are some instruction to deal with it
#
# MOVZX - Move Zero Extension - extend with filling zeros
#
# movzx ecx, al
#
# Same as
# mov ecx, 0
# mov cl, al
#
# MOVSX - Signed extension
# movsx eax, cl
#
# CBW - Convert byte to word, word is two bytes
# Sign extends AL to AX, AL - size of byte, AX - size of word
#
# CWDE - Convert word to double word extends (16 bits to 32 bits)
# Sign extends AX to EAX
#
# Two following instructions affects EDX register
# CWD - convert word to double word
# Sign extends AX to DX:AX (16 bits to 32 bits)
#
# CDQ - Convert double word to quad word
# Sign extends EAX to EDX:EAX (32 bits to 64 bits)
#
# IMUL/IDIV - Signed versions of MUL and DIV
# Understands the 2's complement representation
#
# You can think of it like this (may be implemented in different way):
# 1. Remember the original signs of the operands
# 2. Convert numbers to positive numbers
# 3. Invoke the corresponding operation (MUL/DIV)
# 4. Convert the result to negative if necessary
#
# CDQ is often combined with IDIV
# When call CDQ the content of edx will be filled with corresponding value
#  to perform the division edx:eax / arg
#
# Boolean operations
# NOT dest - Flip every bit in dest
# not eax
# not cl
# NOT is not the same as NEG
#
# AND dst, src - dest <- dst and src
# OR  dst, src - dest <- dst or src
# XOR dst, src - dest <- dst xor src
#
# Zeroing with XOR
# xor eax, eax - Very common piece of code in assembly programming
# eax will be set to zero after it, it is used because it has shorter instruction than mov eax, 0h
# xor eax, eax  31 c0 - machine code
# mov eax, 0    B8 00 00 00 00 - machine code
#
# Shifting bits
# Unsigned numbers
# SHL/SHR dst, k  - shift bits in dst to k positions left/right, fill with zeros from the right/left
# Shifting affects the carry flag, last kicked bit (from left or right) will be kept it the Carry Flag
# k must be size of 1 byte or the CL register
#
# Signed numbers
# Shift Ariphmetic Left/Right
# SAR/SAL - SHL/SHR alternatives for signed numbers, fill gaps according to the sign of original number.
# SAL - The same as SHL
#
# ROL/ROR dest, k - Rotate bits to the left/right on k position
# Carry Flag will contain the last rotated bit
# k - one byte size, or CL register
# mov al, 01001011b
# ror al, 1
# ->->->->->->->->to the start
# 0 1 0 0 1 0 1 1
# 1 0 1 0 0 1 0 1  CF=1
#
# Bit Games
# To extract the value of a specific bit you can create a mask, where
#  bits that you are interested in is marked as 1 and the others are 0
#  than perform the AND opertion with the value and check if the mask is the same
#  then interested bit is 1 else it is 0, if the mask is equals to zero, then the bit is 0
#
# We can also perform ROR until interested bit, than check the Carry Flag, and then ROL back
#
# To pack small numbers in a register you may use the following technique
# Suppose we have 2 numbers
# al < 2^5
# bl < 2^3
#
# To pack them into dl do the following
# mov dl, al
# shl dl, 3   ; make room for bl
# or  dl, bl
#
# To unpack do the following
# mov cl, dl    ; make a copy of
# and dl, 111b  ; tale the lowest 3 bits
# mov bl, dl
# shr cl, 3
# mov al, cl
#
# BSWAP - swap bytes in 32-bit container
# 11223344 -> 44332211
#
# MEMORY
# @@@@@@
#
# You can use a constant in your assembler code
#  SOME_CONST = 6
# Assembler will translate in into it's value
#
# When using such instructions the assembled file can take a large size
#  because of memory will be allocated in the assembled file
#
# %define COUNT (100000/4) + 1
# section .data
#    keep_nums:
#    %rep COUNT
#        dd 0
#    %endrep
#
# To prevent such behaviour use .bss section instead of .data section
#  also in .bss section tere are different words to reserve memory
#
# section .bss
#     keep_nums: resb COUNT ; reserve bytes
#
# Addressing
# Byte (8 bits) is a basic quantity regarding x86 memory managment
#
# mov eax, 3          ; Will not assemble
# mov dword [eax], 3  ; Write 3 into four bytes starting from eax (eax=3, eax+1=0, eax+2=0, eax+3=0) (In little endian)
# add word [eax], 3   ; Add 3 into 2 bytes starting from eax
#
# The one can use arithmetic operations in brackets
# mov [eax + 1], 3
# mov [eax + esi], 3
# mov [eax + esi*2], 3
#
# Only one access to memory can be done in one instruction
# mov dword [eax], dword [edx] will not assemble
#
# Load Effective Address
# LEA dst, [expr]
# It calculates the address into the dest, doesn't access memory just calc the address,
#  doesn't change any flag. Also can be used for any other calculation
#
# lea eax, [eax+2*edx+5] - eax <- eax+2*edx+5 % 2^32
# Although LEA is used with brackets it doesn't access memory, it just calculate an expression
#  expression can not be too complex
#
# nums: memory for an array of some numbers
# lea eax, [nums+4*eax] - calculate address of a dword into eax
# mov eax, [nums+4*eax] - move a value of a dword at address nums+4*eax from memory into eax
# LEA can be used to fast calculate something
#
# STRUCTURES
#
# This is how to use structers, very simple
#
# STRUC Point
#    .x: RESD 1
#    .y: RESD 1
#    .size:
# ENDSTRUC
#
#section .bss
#    p1: RESB Point.size
#
#section .data
#    p2: ISTRUC Point
#    AT Point.x, DD 3
#    AT Point.y, DD 4
#IEND
#
#section .text
#
#main:
#
#    mov     dword [p1 + Point.x],  1
#    mov     dword [p1 + Point.y],  2
#
#    mov     eax, [p1 + Point.x]
#    call    print_eax
#
#    mov     eax, [p1 + Point.y]
#    call    print_eax
#
#    mov     eax, [p2 + Point.x]
#    call    print_eax
#
#    mov     eax, [p2 + Point.y]
#    mov     eax, [p2 + 4]        ; Same as above code
#    call    print_eax
#
#    ; Exit the process:
#    push	0
#    call	exit
#
# Here is a description of what's going on when declaring structure
#
# STRUC Point
#    .x: RESD 1   ; Offset 0
#    .y: RESD 1   ; Offest 4
#    .size:       ; Offset 8
# ENDSTRUC
# When adding Point.x to some address you will get an address basing on the offset inside the structure
#
# STRINGS
# @@@@@@@
#
# ASCI - American Standard Code for Information Interchange
# . English alphabet (With capitals)
# . Digits
# . Punctuation
# . Control codes
# Every symbol is 7 bits size
# Worldwide standard
# First used commercially in 1963 for teleprinters
#
# UTF8
# Encoding that implements unicode
# Very common
# First 128 symbols are equals to ASCI
#
# Examples of definition of strings
# section .data
#    str1:   db  'Hello World', 0
#
#    str2:   db  "Hello World", 0
#
#    str3:   db  48h,65h,6ch,6ch,6fh,0   ; Hello
#
#    str4:   db  'Hell'
#            db  'o world', 0
#
#    str5:   db  'Hello',20h,'world',0
#
#    str6:   db  'Line 1', 0ah
#            db  'Line 2'
#
# New Line
# . Windows - 0xd,0xa
# . Linux - 0xa
# 0xd - Carriage Return - Return to the beginig of curretn line
# 0xa - Line feed - Advance the paper one line forward
# These commands was used for the remote printer to tell it how to print the data
#
# String instructions
# STOre String - STOS it srores a string into memory
# STOSB - store value from al into [edi], edi advanced by 1 byte (According to DF)
# STOSW - [edi] <- ax, edi advanced by 2 bytes (According to DF)
# STOSD - [edi] <- eax, edi advanced by 4 bytes (According to DF)
#
# Direction Flag (DF) - Direction for string instructions
# 0 - pointer increases
# 1 - pointer decreases
#
# CLD - clear direction flag, set it to 0
# STD - set direction flag, set it to 1
#
# LODS - Load string from memory
# LODSB - al <- [esi], esi advanced by 1 byte (According to DF)
# LODSW - ax <- [esi], esi advanced by 2 bytes (According to DF)
# LODSD - eax <- [esi], esi advanced by 4 bytes (According to DF)
#
# MOVS - Moves string in memory
# MOVSB - Copy 1 byte, [edi] <- [esi], esi and edi increased by 1 byte (According to DF)
# MOVSW - Copy 2 bytes, [edi] <- [esi], esi and edi increased by 2 bytes (According to DF)
# MOVSD - Copy 4 bytes, [edi] <- [esi], esi and edi increased by 4 bytes (According to DF)
#
# Notes on MOVS
# . MOVS can access two memory addresses at one time
# . eSi - Source, eDi - destination
# . Very usefull to copying data
# Copy example
#     mov esi, src_array
#     mov edi, dst_array
#     mov ecx, ARR_LEN
# copy_byte:
#     movsb
#     loop copy_byte
#
# JECXZ - jump if ecx is zero
#
# REP prefix
# rep   stosb - repeat movsb instruction ecx times
# rep works only with few instructions (stos, lods, movs, scac, cmps)
# rep repeats instruction ecx times
# ecx will become zero when rep is done
#
# rep movsb - copy array
#
# SCAS - Scan String
# SCASB - compare al with [edi]. Set flags accordingly. Advance edi by 1 byte
# SCASW - compare ax with [edi]. Set flags accordingly. Advance edi by 2 byte
# SCASD - compare eax with [edi]. Set flags accordingly. Advance edi by 4 byte
# Flags is set like with cmp instruction, can be used to find a symbol in string
#
# mov ecx, 100
# REPNZ SCASB - repeat scasb max 100 times
# REPNZ - repeat as long as ZF = 0 and ecx != 0
# REPZ - repeat as long as ZF = 1 and ecx != 0
#
# Termination conditions
# Prefix              Flags             ecx
# REP                 -                 ecx = 0
# REPZ/REPE           ZF = 0            ecx = 0
# REPNZ/REPNE         ZF = 1            ecx = 0
#
# CMPS - Compare strings
# CMPSB - compare [esi] and [edi], set flags, advance esi and edi on 1 byte
# CMPSW - compare [esi] and [edi], set flags, advance esi and edi on 2 bytes
# CMPSD - compare [esi] and [edi], set flags, advance esi and edi on 4 bytes
# Can be used with REP
#
# mov esi, buf1
# mov edi, buf2
# mov ecx, BUF_SIZE
# repz cmpsd          ; stop when mismatch was found or ecx exceded
#
# Subroutines
# @@@@@@@@@@@
#
# Stack
# PUSH arg
# . if arg is 16 bits. esp <- esp - 2, word [esp] <- arg
# . if arg is 32 bits. esp <- esp - 4, dword [esp] <- arg
#
# POP arg
# . if arg is 16 bits. arg <- word [esp], esp <- esp + 2,
# . if arg is 32 bits. arg <- dword [esp], esp <- esp + 4
#
# CALL func
# call pushes the address of the next instruction to stack and then change the instruction pointer
#
# RET pops the address from stack and change the instruction pointer value to it address
#
# ret 8
# . pop a dword x from the stack
# . eip <- x
# . increase esp by 8
# This form can be used from inside the function, in the other way the
# add esp, 8 can be used after the function call (assume that the function has 2 args that was
#  previously pushed to stack)
# According to the conventions the return value should be stored in eax.
#
# EBP - Extended Base Pointer
# . Historically ESP could not be used to direct access memory. dword [esp + 4] not possible
# . EBP was used instead.
# Theese days ESP can be used to access memory directly, EBP used to "hold" the stack frame.
#
# ENTER N,0 (enter the function) same as
# push ebp
# mov ebp, esp
# sub esp, N
# Almost all the compilers produce such code when using ENTER
#
#
# LEAVE (leave the function) same as
# mov esp, ebp
# pop ebp
#
#
	# Some assembler instructions
	#
	# x86 - Little endian architecture, bytes stores in reversed order
	#
	# Move (MOV)
	# @@@@@@@@@@
	#
	# mov dst, src - Copy data from source to destination
	# mov eax, 8CBh - Put 8CB number to 32-bit eax register, h at the end is for converting number to HEX representation (base 16).
	# mov ecx, ebx - Copy 32-bit number from ebx to ecx.
	# mov si, cx - Copy a number from si to cx, 16-bit copy.
	#
	# Source and Destination must have the same size
	# mov ecx, dh - Invalid instruction ecx - 32-bits size, dh - 8-bits size.
	#
	# Addition (ADD)
	# @@@@@@@@@@@@@@
	# add dst, src - dst <- dst + src, dest will change, source will stay the same.
	# The result of adding will be truncated in case of overflow
	# add eax, edx - eax <- (eax + edx) mod 2^32
	# add eax, 11b - eax <- (eax + 11b) mod 2^32, 11b - b - binary (base 2), 11b = 3
	#
	# ! Source and Destination should be campatible by size
	#
	# Subtraction (SUB)
	# sub dst, src - dst <- dst - src or dst <- dst + (-src), negation of source is done with 2's negation method.
	# sub eax, edx - eax <- eax - edx
	# sub esi, 4h - esi <- esi - 4h, h for hexadecimal.
	#
	# Increment (INC) and Decrement (DEC)
	# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	# inc eax - eax <- (eax + 1) mod 2^32
	# dec si - si <- (si - 1) mod 2^32
	#
	# Multiplication (MUL)
	# MUL arg - Unsigned multiplication
	# Some forms:
	# 1. ax <- al * arg - if argument is of size 8 bits.
	# 2. dx:ax <- ax * arg - if argument is of size 16 bits.
	# 3. edx:eax <- eax * arg - if argument is of size 32 bits.
	#
	# edx:eax means bits concatenation in both registers
	# The size of the result is larger than the argument size (Twise the amount of bits).
	#
	# mul ecx - edx:eax <- eax * ecx
	# mul si - dx:ax <- ax * si
	# mul al - ax <- al * al
	#
	# multiplication can accept as argument only the register
	# mul 2Ch - invalid
	#
	# Division (DIV)
	# Unsigned division
	# Some forms:
	# 1. If arg is 8 bits size
	# al <- ax / arg - Quotient
	# ah <- ax % arg - Remainder
	# 2. If arg is 16 bits size
	# ax <- dx:ax / arg
	# dx <- dx:ax % arg
	# 3. If arg is 32 bits size
	# eax <- edx:eax / arg
	# edx <- edx:eax % arg
	# div ch - ch - 8 bits size, al <- ax / ch, ah <- ax % ch
	# div esi - esi - 32 bits size, eax <- edx:eax / esi, edx <- edx:eax % esi
	# div di - di - 16 bits size, ax <- dx:ax / di, dx <- dx:ax % di
	# div 5Ah - invalid example
	# Exception will raise in case of division by zero and quotient overflow
	#
	# Instruction Pointer
	# EIP - Extended Instruction Pointer (RIP in 64 bits)
	# If we want to execute code from different location we should change the Instruction Pointer,
	# it contains the address of the current instruction.
	# In protected mode it could not be changed directly (mov eip,eax - invalid)
	#
	# Jump (JMP) allows to set a value of EIP
	# Jump will continue program from given address
	#
	# jmp ecx - change eip value to what in ecx, continue program from the address in ecx
	# jmp 777d1044h - eip <- 0x777d1044h
	# jmp my_label - jump to label, labels are special marks in code, it ends with : symbol
	#
	# Labels in code will be translated into the actual memory addresses on the assembling stage
	# While the program is running eip is always points to the current instruction address
	#
	# Jump can be relative or absolute
	# Relative means set eip to current location plus some bytes
	# Absolute means jump to some specific address
	# Assembler choose what to use by himself, often it picks the relative one because it shorter
	#
	#
	# Flags Register
	# Used to make branches and decisions in code
	# Some specific flags inside the flags register
	#
	# Zero flag
	# Sign flag
	# Carry flag
	# Overflow flag
	#
	# No direct access to this register, every bit in the register is a flag that represents True or False
	# This is how it looks like https://ru.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D0%B8%D1%81%D1%82%D1%80_%D1%84%D0%BB%D0%B0%D0%B3%D0%BE%D0%B2
	#
	# Zero flag is set to 1 when the last calculation had the zero result
	# it cleared (set to 0) whenever the last calculation had non zero result
	# mov eax, 3h
	# mov ecx, 3h
	# sub eax, ecx
	# Zero flag will be set to 1
	#
	# mov eax, 3h
	# mov ecx, 3h
	# add eax, ecx
	# After the add the Zero flag will be set to 0
	#
	# Sign flag equals the most significant bit of the last result
	# 0 if result is positive in 2's complement form
	# 1 if result is negative
	# mov edx, 0
	# dec edx
	# Sign flag will be set to 1, result is negative
	#
	# mov edx, 0
	# inc edx
	# Sign flag will be set to 0, result is positive
	#
	# Carry flag
	# It understands unsigned addition and subtraction,
	# it set if the addition of two numbers causes carry out the most significant bit (Leftmost bit)
	# mov eax, ffffffffh
	# inc eax
	# Carry flag is set to 1
	# Tells you that the result of adding two unsigned numbers is wrong
	#
	# It also set if the subtraction requires a borrow into the most significant bit
	# mov ecx, 0
	# mov edx, 11b
	# sub ecx, edx
	# if there is no bits to borrow then we borrow from some imaginary bit (1 is out of the ecx)
	# ecx 1 \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 0 0 \|
	# edx - \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 0 0 \| 0 0 0 0 0 0 1 1 \|
	# \| 1 1 1 1 1 1 1 1 \| 1 1 1 1 1 1 1 1 \| 1 1 1 1 1 1 1 1 \| 1 1 1 1 1 1 0 1 \|
	# Carry flag is set to 1
	#
	# Overflow flag
	# Understands signed addition and subtraction
	# Addition:
	# Set if the addition of two positive numbers has a negative result
	# Set if the addition of two negative numbers has a positive result
	# Subtraction:
	# Set if "+" - "-" = "-"
	# Set if "-" - "+" = "+"
	# Tells you if there is something wrong with your signed addition or subtraction
	# "+" + "-", "+" - "+", "-" - "-" can not set the overflow flag because the result can be
	# both positive or negative
	# To clear the overflow flag you can add positive and negative numbers
	#
	# mov al, 7fh
	# mov cl, 1h
	# add al, cl
	# \| 7 \| f \|
	# al 01111111 - positive, leading 0
	# \| 0 \| 1 \|
	# cl 00000001 - positive, leading 0
	# res 10000000 - negative in 2's complement form - 0x80
	# Overflow flag is set "+" + "+" = "-"
	#
	# If work with unsigned numbers look at carry flag
	# If work with signed numbers look at overflow flag
	#
	# Basic comditional branching
	# Jumping according to flags
	# JMP - Uncoditional jump
	# J<xx> - xx - some condition - conditional jump
	# JZ - jump if zero flag is set - 1
	# JNZ - jump if zero flaf is not set - 0
	# JS/JNS - Check signed flag
	# JC/JNC - Check carry flag
	# JO/JNO - Check overflow flag
	# Jump instruction it is indirect way to read the flags register
	#
	# To compare 2 unsigned numbers you can do something like this
	# mov eax, 99h
	# mov ebx, 100h
	# sub eax, ebx
	# jc a_lt_b
	# jnc b_lt_a
	# Via the subtraction the carry flag will be set in case of the first number is less then the second
	# But in this case the value of eax will be overwritten, to prevent this we could use CMP
	# CMP worls like SUB but it doesn't store the result, just affect flags
	# To compare unsigned numbers use staff like this
	# cmp a, b
	# jb - jump if below
	# jbe - jump if below or equals
	# ja/jae - jump if above or equals
	#
	# JB CF = 1 (same as JC)
	# JBE CF = 1 \| ZF = 1
	# JA CF = 0 \| ZF = 0
	# JAE CF = 0 (same as JNC)
	#
	# For signed numbers we may use CMP and then check Sign, Overflow and Zero flag
	# Instead of this we can use the following instructions
	# jl/jle - jump if less/equals
	# jg/jge - jump if greater/equals
	#
	# JG SF = OF & ZF = 0
	# JGE SF = OF
	# JL SF != OF
	# JLE SF != OF & ZF = 1
	# here https://www.udemy.com/x86-asm-foundations/learn/v4/t/lecture/1372092?start=0 you can get mathematical
	# prove of this table
	# When the OF Overflow Flag = 0
	# - No overflow occured - The result has the "correct" sign
	# - SF Sign Flag = 0 => a - b >= 0 => a >= b (OF = SF = 0)
	# - SF Sign Flag = 1 => a - b < 0 => a < b (0 = OF != SF = 1)
	# When OF = 1
	# - Overflow occured - The result has "wrong" sign
	# - SF = 0 => a < b (1 = OF != SF = 0)
	# 0xfe - 0x7f (-2 - 127)
	# 1111 1110 - 0111 1111 = 1111 1110 + 1000 0001
	# 1111 1110
	# 1000 0001
	# 1 0111 1111 = 0x7D(127)
	# o s
	#
	# - SF = 1 => a >= b (OF = SF = 1)
	# 7f - ff (127 - (-1))
	# 0111 1111 - 1111 1111 = 0111 1111 + 0000 0001
	# 0111 1111
	# 0000 0001
	# 1000 0000 = 0x80(-128)
	# s
	# o
	#
	# It all means that a >= b => OF = SF which is the JGE instruction,
	# other instructions can be derived form this
	#
	# NEG - Negate
	# Find the 2's complement negation for a number (Flip all the bits and add 1)
	# 0 0 0 0 0 0 1 1
	# 1 1 1 1 1 1 0 1 - NEG
	# When extending signed number from for example 8 bits to 16 bits
	# first 8 bits will be filled with sign bit of the number
	#
	# There are some instruction to deal with it
	#
	# MOVZX - Move Zero Extension - extend with filling zeros
	#
	# movzx ecx, al
	#
	# Same as
	# mov ecx, 0
	# mov cl, al
	#
	# MOVSX - Signed extension
	# movsx eax, cl
	#
	# CBW - Convert byte to word, word is two bytes
	# Sign extends AL to AX, AL - size of byte, AX - size of word
	#
	# CWDE - Convert word to double word extends (16 bits to 32 bits)
	# Sign extends AX to EAX
	#
	# Two following instructions affects EDX register
	# CWD - convert word to double word
	# Sign extends AX to DX:AX (16 bits to 32 bits)
	#
	# CDQ - Convert double word to quad word
	# Sign extends EAX to EDX:EAX (32 bits to 64 bits)
	#
	# IMUL/IDIV - Signed versions of MUL and DIV
	# Understands the 2's complement representation
	#
	# You can think of it like this (may be implemented in different way):
	# 1. Remember the original signs of the operands
	# 2. Convert numbers to positive numbers
	# 3. Invoke the corresponding operation (MUL/DIV)
	# 4. Convert the result to negative if necessary
	#
	# CDQ is often combined with IDIV
	# When call CDQ the content of edx will be filled with corresponding value
	# to perform the division edx:eax / arg
	#
	# Boolean operations
	# NOT dest - Flip every bit in dest
	# not eax
	# not cl
	# NOT is not the same as NEG
	#
	# AND dst, src - dest <- dst and src
	# OR dst, src - dest <- dst or src
	# XOR dst, src - dest <- dst xor src
	#
	# Zeroing with XOR
	# xor eax, eax - Very common piece of code in assembly programming
	# eax will be set to zero after it, it is used because it has shorter instruction than mov eax, 0h
	# xor eax, eax 31 c0 - machine code
	# mov eax, 0 B8 00 00 00 00 - machine code
	#
	# Shifting bits
	# Unsigned numbers
	# SHL/SHR dst, k - shift bits in dst to k positions left/right, fill with zeros from the right/left
	# Shifting affects the carry flag, last kicked bit (from left or right) will be kept it the Carry Flag
	# k must be size of 1 byte or the CL register
	#
	# Signed numbers
	# Shift Ariphmetic Left/Right
	# SAR/SAL - SHL/SHR alternatives for signed numbers, fill gaps according to the sign of original number.
	# SAL - The same as SHL
	#
	# ROL/ROR dest, k - Rotate bits to the left/right on k position
	# Carry Flag will contain the last rotated bit
	# k - one byte size, or CL register
	# mov al, 01001011b
	# ror al, 1
	# ->->->->->->->->to the start
	# 0 1 0 0 1 0 1 1
	# 1 0 1 0 0 1 0 1 CF=1
	#
	# Bit Games
	# To extract the value of a specific bit you can create a mask, where
	# bits that you are interested in is marked as 1 and the others are 0
	# than perform the AND opertion with the value and check if the mask is the same
	# then interested bit is 1 else it is 0, if the mask is equals to zero, then the bit is 0
	#
	# We can also perform ROR until interested bit, than check the Carry Flag, and then ROL back
	#
	# To pack small numbers in a register you may use the following technique
	# Suppose we have 2 numbers
	# al < 2^5
	# bl < 2^3
	#
	# To pack them into dl do the following
	# mov dl, al
	# shl dl, 3 ; make room for bl
	# or dl, bl
	#
	# To unpack do the following
	# mov cl, dl ; make a copy of
	# and dl, 111b ; tale the lowest 3 bits
	# mov bl, dl
	# shr cl, 3
	# mov al, cl
	#
	# BSWAP - swap bytes in 32-bit container
	# 11223344 -> 44332211
	#
	# MEMORY
	# @@@@@@
	#
	# You can use a constant in your assembler code
	# SOME_CONST = 6
	# Assembler will translate in into it's value
	#
	# When using such instructions the assembled file can take a large size
	# because of memory will be allocated in the assembled file
	#
	# %define COUNT (100000/4) + 1
	# section .data
	# keep_nums:
	# %rep COUNT
	# dd 0
	# %endrep
	#
	# To prevent such behaviour use .bss section instead of .data section
	# also in .bss section tere are different words to reserve memory
	#
	# section .bss
	# keep_nums: resb COUNT ; reserve bytes
	#
	# Addressing
	# Byte (8 bits) is a basic quantity regarding x86 memory managment
	#
	# mov eax, 3 ; Will not assemble
	# mov dword [eax], 3 ; Write 3 into four bytes starting from eax (eax=3, eax+1=0, eax+2=0, eax+3=0) (In little endian)
	# add word [eax], 3 ; Add 3 into 2 bytes starting from eax
	#
	# The one can use arithmetic operations in brackets
	# mov [eax + 1], 3
	# mov [eax + esi], 3
	# mov [eax + esi*2], 3
	#
	# Only one access to memory can be done in one instruction
	# mov dword [eax], dword [edx] will not assemble
	#
	# Load Effective Address
	# LEA dst, [expr]
	# It calculates the address into the dest, doesn't access memory just calc the address,
	# doesn't change any flag. Also can be used for any other calculation
	#
	# lea eax, [eax+2edx+5] - eax <- eax+2edx+5 % 2^32
	# Although LEA is used with brackets it doesn't access memory, it just calculate an expression
	# expression can not be too complex
	#
	# nums: memory for an array of some numbers
	# lea eax, [nums+4*eax] - calculate address of a dword into eax
	# mov eax, [nums+4eax] - move a value of a dword at address nums+4eax from memory into eax
	# LEA can be used to fast calculate something
	#
	# STRUCTURES
	#
	# This is how to use structers, very simple
	#
	# STRUC Point
	# .x: RESD 1
	# .y: RESD 1
	# .size:
	# ENDSTRUC
	#
	#section .bss
	# p1: RESB Point.size
	#
	#section .data
	# p2: ISTRUC Point
	# AT Point.x, DD 3
	# AT Point.y, DD 4
	#IEND
	#
	#section .text
	#
	#main:
	#
	# mov dword [p1 + Point.x], 1
	# mov dword [p1 + Point.y], 2
	#
	# mov eax, [p1 + Point.x]
	# call print_eax
	#
	# mov eax, [p1 + Point.y]
	# call print_eax
	#
	# mov eax, [p2 + Point.x]
	# call print_eax
	#
	# mov eax, [p2 + Point.y]
	# mov eax, [p2 + 4] ; Same as above code
	# call print_eax
	#
	# ; Exit the process:
	# push 0
	# call exit
	#
	# Here is a description of what's going on when declaring structure
	#
	# STRUC Point
	# .x: RESD 1 ; Offset 0
	# .y: RESD 1 ; Offest 4
	# .size: ; Offset 8
	# ENDSTRUC
	# When adding Point.x to some address you will get an address basing on the offset inside the structure
	#
	# STRINGS
	# @@@@@@@
	#
	# ASCI - American Standard Code for Information Interchange
	# . English alphabet (With capitals)
	# . Digits
	# . Punctuation
	# . Control codes
	# Every symbol is 7 bits size
	# Worldwide standard
	# First used commercially in 1963 for teleprinters
	#
	# UTF8
	# Encoding that implements unicode
	# Very common
	# First 128 symbols are equals to ASCI
	#
	# Examples of definition of strings
	# section .data
	# str1: db 'Hello World', 0
	#
	# str2: db "Hello World", 0
	#
	# str3: db 48h,65h,6ch,6ch,6fh,0 ; Hello
	#
	# str4: db 'Hell'
	# db 'o world', 0
	#
	# str5: db 'Hello',20h,'world',0
	#
	# str6: db 'Line 1', 0ah
	# db 'Line 2'
	#
	# New Line
	# . Windows - 0xd,0xa
	# . Linux - 0xa
	# 0xd - Carriage Return - Return to the beginig of curretn line
	# 0xa - Line feed - Advance the paper one line forward
	# These commands was used for the remote printer to tell it how to print the data
	#
	# String instructions
	# STOre String - STOS it srores a string into memory
	# STOSB - store value from al into [edi], edi advanced by 1 byte (According to DF)
	# STOSW - [edi] <- ax, edi advanced by 2 bytes (According to DF)
	# STOSD - [edi] <- eax, edi advanced by 4 bytes (According to DF)
	#
	# Direction Flag (DF) - Direction for string instructions
	# 0 - pointer increases
	# 1 - pointer decreases
	#
	# CLD - clear direction flag, set it to 0
	# STD - set direction flag, set it to 1
	#
	# LODS - Load string from memory
	# LODSB - al <- [esi], esi advanced by 1 byte (According to DF)
	# LODSW - ax <- [esi], esi advanced by 2 bytes (According to DF)
	# LODSD - eax <- [esi], esi advanced by 4 bytes (According to DF)
	#
	# MOVS - Moves string in memory
	# MOVSB - Copy 1 byte, [edi] <- [esi], esi and edi increased by 1 byte (According to DF)
	# MOVSW - Copy 2 bytes, [edi] <- [esi], esi and edi increased by 2 bytes (According to DF)
	# MOVSD - Copy 4 bytes, [edi] <- [esi], esi and edi increased by 4 bytes (According to DF)
	#
	# Notes on MOVS
	# . MOVS can access two memory addresses at one time
	# . eSi - Source, eDi - destination
	# . Very usefull to copying data
	# Copy example
	# mov esi, src_array
	# mov edi, dst_array
	# mov ecx, ARR_LEN
	# copy_byte:
	# movsb
	# loop copy_byte
	#
	# JECXZ - jump if ecx is zero
	#
	# REP prefix
	# rep stosb - repeat movsb instruction ecx times
	# rep works only with few instructions (stos, lods, movs, scac, cmps)
	# rep repeats instruction ecx times
	# ecx will become zero when rep is done
	#
	# rep movsb - copy array
	#
	# SCAS - Scan String
	# SCASB - compare al with [edi]. Set flags accordingly. Advance edi by 1 byte
	# SCASW - compare ax with [edi]. Set flags accordingly. Advance edi by 2 byte
	# SCASD - compare eax with [edi]. Set flags accordingly. Advance edi by 4 byte
	# Flags is set like with cmp instruction, can be used to find a symbol in string
	#
	# mov ecx, 100
	# REPNZ SCASB - repeat scasb max 100 times
	# REPNZ - repeat as long as ZF = 0 and ecx != 0
	# REPZ - repeat as long as ZF = 1 and ecx != 0
	#
	# Termination conditions
	# Prefix Flags ecx
	# REP - ecx = 0
	# REPZ/REPE ZF = 0 ecx = 0
	# REPNZ/REPNE ZF = 1 ecx = 0
	#
	# CMPS - Compare strings
	# CMPSB - compare [esi] and [edi], set flags, advance esi and edi on 1 byte
	# CMPSW - compare [esi] and [edi], set flags, advance esi and edi on 2 bytes
	# CMPSD - compare [esi] and [edi], set flags, advance esi and edi on 4 bytes
	# Can be used with REP
	#
	# mov esi, buf1
	# mov edi, buf2
	# mov ecx, BUF_SIZE
	# repz cmpsd ; stop when mismatch was found or ecx exceded
	#
	# Subroutines
	# @@@@@@@@@@@
	#
	# Stack
	# PUSH arg
	# . if arg is 16 bits. esp <- esp - 2, word [esp] <- arg
	# . if arg is 32 bits. esp <- esp - 4, dword [esp] <- arg
	#
	# POP arg
	# . if arg is 16 bits. arg <- word [esp], esp <- esp + 2,
	# . if arg is 32 bits. arg <- dword [esp], esp <- esp + 4
	#
	# CALL func
	# call pushes the address of the next instruction to stack and then change the instruction pointer
	#
	# RET pops the address from stack and change the instruction pointer value to it address
	#
	# ret 8
	# . pop a dword x from the stack
	# . eip <- x
	# . increase esp by 8
	# This form can be used from inside the function, in the other way the
	# add esp, 8 can be used after the function call (assume that the function has 2 args that was
	# previously pushed to stack)
	# According to the conventions the return value should be stored in eax.
	#
	# EBP - Extended Base Pointer
	# . Historically ESP could not be used to direct access memory. dword [esp + 4] not possible
	# . EBP was used instead.
	# Theese days ESP can be used to access memory directly, EBP used to "hold" the stack frame.
	#
	# ENTER N,0 (enter the function) same as
	# push ebp
	# mov ebp, esp
	# sub esp, N
	# Almost all the compilers produce such code when using ENTER
	#
	#
	# LEAVE (leave the function) same as
	# mov esp, ebp
	# pop ebp
	#
	#