Skip to content

Instantly share code, notes, and snippets.

@jamesladd
Created November 29, 2010 22:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamesladd/720718 to your computer and use it in GitHub Desktop.
Save jamesladd/720718 to your computer and use it in GitHub Desktop.
x86 Assembler - win32 overlapped non-blocking io.
; -----------------------------------------------------------------------------
; main.asm - FASt Server - Main FASt Server entry point and loader.
; http://www.jamesladdcode.com/
;
; -----------------------------------------------------------------------------
;
.686 ; create 32 bit code
.model flat, stdcall ; 32 bit memory model
option casemap :none ; case sensitive
; -----------------------------------------------------------------------------
;
include \masm32\include\windows.inc
include \masm32\include\kernel32.inc
includelib \masm32\lib\kernel32.lib
include \masm32\include\ws2_32.inc
include \masm32\include\wsock32.inc
includelib \masm32\lib\ws2_32.lib
includelib \masm32\lib\mswsock.lib ; winsock2 ms specific library.
include \masm32\include\masm32.inc
includelib \masm32\lib\masm32.lib
; -----------------------------------------------------------------------------
;
WSA_FLAG_OVERLAPPED equ 01h
WSA_IO_PENDING equ ERROR_IO_PENDING
WSABUF struct
len DWORD ?
buf PBYTE ?
WSABUF ends
LPWSABUF typedef ptr WSABUF
FASTSERVER struct
wsadata WSADATA <>
ListenSocket SOCKET ?
ListenAddr sockaddr_in <>
hIOCompletionPort HANDLE ?
dwIOThreadCount DWORD ?
FASTSERVER ends
PFASTSERVER typedef ptr FASTSERVER
CONTEXT_KEY struct
socket SOCKET ?
ovIn OVERLAPPED <>
ovOut OVERLAPPED <>
dwRecvBytes DWORD ?
dwSendBytes DWORD ?
dwFlags DWORD ?
wsaInBuf WSABUF <>
wsaOutBuf WSABUF <>
chInBuf db 32 dup(?)
chOutBuf db 32 dup(?)
CONTEXT_KEY ends
PCONTEXT_KEY typedef ptr CONTEXT_KEY
; -----------------------------------------------------------------------------
;
.data
server FASTSERVER <>
szGetVersionExFailed db 13,10,"Failed to get Windows version.",0
szOsVersionIncorrect db 13,10,"Windows NT 3.51 or later required.",0
szCreateListenSocketFailed db 13,10,"Failed to create a socket to listen on.",0
szBindFailed db 13,10,"Failed to bind socket local address and port.",0
szCompletionPortFailed db 13,10,"Failed to create IO Completion Port.",0
szThreadStart db 13,10,"IO Worker Thread Started.",0
szThreadEnd db 13,10,"IO Worker Thread Ended.",0
szThreadEvent db 13,10,"IO Worker Thread IO Event.",13,10,0
szListening db 13,10,"Listening for connections.",13,10,0
szAcceptedSocket db 13,10,"Accepted new socket connection.",13,10,0
szAcceptInvalidSocket db 13,10,"Accept got an invalid socket.",0
szAcceptSocketAssociateFailed db 13,10,"Failed to associate accepted scoket with completion port.",0
szReadFailed db 13,10,"Socket read failed.",0
szReadFailedFatal db 13,10,"Socket read failed *fatally*.",0
szGetQueuedCompletionStatusFailed db 13,10,"Get of a queued completion *failed*",13,10,0
szEndOfFile1 db 13,10,"End Of File (1).",13,10,0
szEndOfFile2 db 13,10,"End Of File (2).",13,10,0
szWriteFailed db 13,10,"Write to socket *failed*.",13,10,0
.code
; -----------------------------------------------------------------------------
;
align 4
issue_read_request proc pContextKey:PCONTEXT_KEY
local dwLoopCounter:DWORD
; Issue a read request for at most 6 times. The count is just a number I
; chose. We can get errors we can handle so if we get one, we want to loop
; again but we dont want to do this forever.
;
mov dwLoopCounter, 6
issue_read_loop:
mov edx, pContextKey
mov [edx].CONTEXT_KEY.wsaInBuf.len, 32
lea ecx, [edx].CONTEXT_KEY.chInBuf
mov [edx].CONTEXT_KEY.wsaInBuf.buf, ecx
invoke WSARecv, [edx].CONTEXT_KEY.socket, addr [edx].CONTEXT_KEY.wsaInBuf, 1,
addr [edx].CONTEXT_KEY.dwRecvBytes, addr [edx].CONTEXT_KEY.dwFlags,
addr [edx].CONTEXT_KEY.ovIn, NULL
cmp eax, 0
jne @F
; Failed to read.
invoke StdOut, szReadFailed
ret
; Read succeeded immediately,
; wait for completion packet before processing it.
@@:
ret
; Check if the IO is pending, which is a good thing.
;
@@:
invoke GetLastError
cmp eax, ERROR_IO_PENDING
jne @F
ret
; Handle those errors we can deal with.
;
@@:
cmp eax, ERROR_INVALID_USER_BUFFER
je @F
cmp eax, ERROR_NOT_ENOUGH_QUOTA
je @F
cmp eax, ERROR_NOT_ENOUGH_MEMORY
je @F
; Read failed with an error we cant recover from !
invoke StdOut, addr szReadFailedFatal
ret
; Try again if we have not tried enough already.
;
@@:
invoke Sleep, 50
dec dwLoopCounter
jnz issue_read_loop
ret
issue_read_request endp
; -----------------------------------------------------------------------------
;
align 4
io_thread_func proc pServer:PFASTSERVER
local hIOCompletionPort:HANDLE
local dwNumRead:DWORD
local pContextKey:PTR DWORD
local pOverlapped:PTR OVERLAPPED
invoke StdOut, addr szThreadStart
mov edx, pServer
mov ecx, [edx].FASTSERVER.hIOCompletionPort
mov hIOCompletionPort, ecx
; The thread blocks here forever waiting for a completion event to be
; available. When an event arrives the thread unblocks and processes the
; compeltion event. After the event has been processed the thread goes
; back to blocking (waiting) for another event.
;
io_thread_func_loop:
invoke GetQueuedCompletionStatus, hIOCompletionPort, addr dwNumRead,
addr pContextKey, addr pOverlapped, INFINITE
cmp eax, 0
jne @F
cmp pOverlapped, NULL
jne @F
; Serious Error !
; GetQueueCompletionStatus failed and we got nothing back.
;
invoke StdOut, addr szGetQueuedCompletionStatusFailed
ret
@@:
cmp pOverlapped, NULL
jne @F
; IO operation failed. The things I have read cant tell me why
; this happens.
;
mov edx, pContextKey
invoke closesocket, [edx].CONTEXT_KEY.socket
invoke GetProcessHeap
mov edx, pContextKey
invoke HeapFree, eax, NULL, edx
invoke StdOut, addr szEndOfFile1
jmp io_thread_func_loop
; Are we at the end of the file ?
;
@@:
cmp dwNumRead, 0
jne @F
mov edx, pContextKey
invoke closesocket, [edx].CONTEXT_KEY.socket
invoke GetProcessHeap
mov edx, pContextKey
invoke HeapFree, eax, NULL, edx
invoke StdOut, addr szEndOfFile2
jmp io_thread_func_loop
; We have read some data, so append to our buffer, write the data out (Echo)
; and issue another read. We must issue another read or we dont get any more
; data.
;
@@:
mov edx, pContextKey
mov ecx, dwNumRead
mov [edx].CONTEXT_KEY.wsaOutBuf.len, 1
lea ecx, [edx].CONTEXT_KEY.chInBuf
mov al, [ecx]
lea ecx, [edx].CONTEXT_KEY.chOutBuf
mov [ecx], al
mov [edx].CONTEXT_KEY.wsaOutBuf.buf, ecx
mov [edx].CONTEXT_KEY.dwFlags, 0
; TODO: Modify so we only send back out when buffer full or newline received.
invoke WSASend, [edx].CONTEXT_KEY.socket, addr [edx].CONTEXT_KEY.wsaOutBuf, 1,
addr [edx].CONTEXT_KEY.dwSendBytes, addr [edx].CONTEXT_KEY.dwFlags,
addr [edx].CONTEXT_KEY.ovOut, NULL
cmp eax, 0
je @F
invoke WSAGetLastError
cmp eax, WSA_IO_PENDING
je @F
; Failed to write data out.
invoke StdOut, addr szWriteFailed
; Issue a new read
;
@@:
invoke issue_read_request, pContextKey
invoke StdOut, addr szThreadEvent
jmp io_thread_func_loop
invoke StdOut, addr szThreadEnd
ret
io_thread_func endp
; -----------------------------------------------------------------------------
;
align 4
fast_server_run proc pServer:PFASTSERVER
local OSVersionInfo:OSVERSIONINFO
local SystemInfo:SYSTEM_INFO
local hIOCompletionPort:HANDLE
local dwIOThreadCount:DWORD
local dwThreadId:DWORD
local saClient:sockaddr
local dwClientSize:DWORD
local acceptSocket:SOCKET
local pContextKey:PCONTEXT_KEY
; Check we are running under the right version of Windows. We can only
; run under versions of Windows NT (3.51, 4.0) or later. Includes XP.
;
lea edx, OSVersionInfo
mov [edx].OSVERSIONINFO.dwOSVersionInfoSize, sizeof OSVERSIONINFO
invoke GetVersionEx, edx
cmp eax, 0
jne @F
; Failed to get version information.
invoke StdOut, addr szGetVersionExFailed
ret
@@:
lea edx, OSVersionInfo
cmp [edx].OSVERSIONINFO.dwPlatformId, VER_PLATFORM_WIN32_NT
je @F
; Not running correct version of Windows.
invoke StdOut, addr szOsVersionIncorrect
ret
; Start Windows Socket subsystem.
;
@@:
mov edx, pServer
invoke WSAStartup, 0202h, addr [edx].FASTSERVER.wsadata
; Create a socket that we can listen on for client connections.
; By default sockets are overlapped but we will explicitly create one
; overlapped.
;
invoke WSASocket, AF_INET, SOCK_STREAM, 0, NULL, 0, WSA_FLAG_OVERLAPPED
cmp eax, INVALID_SOCKET
jne @F
; Failed to create a socket to listen on.
invoke StdOut, addr szCreateListenSocketFailed
ret
@@:
mov edx, pServer
mov [edx].FASTSERVER.ListenSocket, eax
; Bind a local address and port that clients can connect to.
; I have chosen 9080 as the port.
;
invoke htons, 9080d
mov edx, pServer
mov [edx].FASTSERVER.ListenAddr.sin_port, ax
mov [edx].FASTSERVER.ListenAddr.sin_family, AF_INET
mov [edx].FASTSERVER.ListenAddr.sin_addr.S_un.S_addr, INADDR_ANY
invoke bind, [edx].FASTSERVER.ListenSocket, addr [edx].FASTSERVER.ListenAddr, sizeof sockaddr_in
cmp eax, SOCKET_ERROR
jne @F
; Failed to bind local address and port.
invoke StdOut, addr szBindFailed
ret
; Create an IO Completion Port. The first time we do this we dont
; associate a socket with the port. We also never associate the listening
; socket with the completion port.
; Let the IO subsystem use its default number of threads for
; handling IO. You could experiment with changing this value, but I dont
; think it has a great impact, but YOU should experiment.
;
@@:
invoke CreateIoCompletionPort, INVALID_HANDLE_VALUE, NULL, 0, 0
cmp eax, NULL
jne @F
; Failed to create the IO completion port
invoke StdOut, addr szCompletionPortFailed
ret
@@:
mov edx, pServer
mov [edx].FASTSERVER.hIOCompletionPort, eax
mov hIOCompletionPort, eax
; Create a set of IO Worker threads. These threads block on the completion
; port and wait for completion events. When a completion event is signaled
; a thread handles it. We dont use completion routines as a long running
; completion routine could starve the underlying subsystem from handling
; additional events.
; We create two threads per system processor plus two, which is a good
; rule of thumb. You can experiment with making this more, but I wouldnt
; make it less.
; Note how we close the handle to the thread after it is created. This
; doesnt stop the thread. We dont keep the handle around as we can just
; post special completion keys to the completion port to get the threads
; to end.
;
invoke GetSystemInfo, addr SystemInfo
mov edx, SystemInfo.SYSTEM_INFO.dwNumberOfProcessors
add edx, edx
add edx, 2
mov ecx, pServer
mov [ecx].FASTSERVER.dwIOThreadCount, edx
mov dwIOThreadCount, edx
@@:
invoke CreateThread, NULL, 0, io_thread_func, pServer, 0, addr dwThreadId
invoke CloseHandle, eax
dec dwIOThreadCount
jnz @B
; Listen on the listening socket, which will allow clients to connect.
; listen() allows you to specify the backlog of connection, but from what I have
; read this can be from 1 to 5, with all values over 5 changed to 5.
;
mov edx, pServer
invoke listen, [edx].FASTSERVER.ListenSocket, 5
invoke StdOut, addr szListening
; Loop forever accepting new connections and reading from them.
;
mov dwClientSize, sizeof sockaddr
fast_server_run_loop:
; Accept a new connection from a client. The next accepted connection in the queue
; is returned.
;
mov edx, pServer
invoke WSAAccept, [edx].FASTSERVER.ListenSocket, addr saClient, addr dwClientSize, NULL, NULL
cmp eax, INVALID_SOCKET
jne @F
; Invalid socket from accept.
invoke StdOut, addr szAcceptInvalidSocket
jmp fast_server_run_loop
@@:
mov acceptSocket, eax
invoke StdOut, addr szAcceptedSocket
; Create a completion key that we can pass along with the accepted socket to the
; IO Completion Port. This way, when IO completes on the socket the completion key
; will be passed along with the completion status. ie: the IO worker thread gets
; the completion key.
;
invoke GetProcessHeap
invoke HeapAlloc, eax, 08h, sizeof CONTEXT_KEY
mov pContextKey, eax
invoke RtlZeroMemory, eax, sizeof CONTEXT_KEY
; Create and mask an event so that we dont get events for writes
; that complete. We dont need these events, just read completion events.
; This speeds things up quite a bit.
;
invoke CreateEvent, NULL, TRUE, FALSE, NULL
or eax, 01h
mov edx, pContextKey
mov ecx, acceptSocket
mov [edx].CONTEXT_KEY.socket, ecx
mov [edx].CONTEXT_KEY.ovOut.hEvent, eax
; Associate the newly accepted socket and completion key with the
; IO Completion Port.
;
invoke CreateIoCompletionPort, acceptSocket, hIOCompletionPort, edx, 0
cmp eax, NULL
jne @F
; Failed to associate new socket with completion port.
invoke StdOut, addr szAcceptSocketAssociateFailed
jmp fast_server_run_loop
; Issue a read request on the newly accepted socket.
; This read will be queued and when it completes we will be notifed.
; ie: One of the threads will wake up and get a completion key etc.
;
@@:
invoke issue_read_request, pContextKey
jmp fast_server_run_loop
ret
fast_server_run endp
; -----------------------------------------------------------------------------
; Executable program starting/entry point.
;
start:
invoke fast_server_run, addr server
invoke ExitProcess, eax
end start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment