Skip to content

Instantly share code, notes, and snippets.

@magiskboy
Created June 25, 2023 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save magiskboy/3f734d28b9d244f318c1dbac8d5624ad to your computer and use it in GitHub Desktop.
Save magiskboy/3f734d28b9d244f318c1dbac8d5624ad to your computer and use it in GitHub Desktop.
Simple HTTP parser, 1M req/s
# STATE
STATE_METHOD = 0
STATE_PATH = 1
STATE_HTTP_VERSION = 2
STATE_HEADER = 3
STATE_BODY = 4
class ParserBase:
def __init__(self, protocol):
self.protocol = protocol
self.current_token = b""
self.current_state = STATE_METHOD
def parse_header(self, header: bytes) -> (bytes, bytes):
idx = header.find(b":")
if idx < 0:
raise ValueError(f"Header {header} is invalid")
name, value = header[:idx], header[idx+1:]
return name.strip(), value.strip()
def feed_data(self, data: bytes):
for c in data.decode("utf-8"):
self.current_token += c
if self.current_state == STATE_METHOD:
if self.current_token.endswith(" "):
self.protocol.on_method(self.current_token[:-1])
self.current_token = ""
self.current_state = STATE_PATH
continue
if self.current_state == STATE_PATH:
if self.current_token.endswith(" "):
self.protocol.on_path(self.current_token[:-1])
self.current_token = ""
self.current_state = STATE_HTTP_VERSION
continue
if self.current_state == STATE_HTTP_VERSION:
if self.current_token.endswith("\r\n"):
self.protocol.on_http_version(self.current_token[:-2])
self.current_token = ""
self.current_state = STATE_HEADER
continue
if self.current_state == STATE_HEADER:
if self.current_token.endswith("\r\n"):
header = self.current_token[:-2]
if header:
self.protocol.on_header(*self.parse_header(header))
self.current_token = ""
self.current_state = STATE_HEADER
else: # blank line
self.protocol.on_headers_completed()
self.current_token = ""
self.current_state = STATE_BODY
continue
if self.current_state == STATE_BODY:
self.protocol.on_body(self.current_token)
self.current_token = ""
class Parser(ParserBase):
def __init__(self, protocol):
super().__init__(protocol)
self.current_token = b""
def feed_data(self, data: bytes):
idx = 0
len_data = len(data)
while idx < len_data:
if self.current_state == STATE_METHOD:
i = data.find(b" ", idx)
if i == -1:
self.current_token += data[idx:]
break
else:
self.current_token += data[idx:i]
self.protocol.on_method(self.current_token)
self.current_token = b""
idx = i + 1
self.current_state = STATE_PATH
continue
if self.current_state == STATE_PATH:
i = data.find(b" ", idx)
if i == -1:
self.current_token += data[idx:]
break
else:
self.current_token += data[idx:i]
self.protocol.on_path(self.current_token)
self.current_token = b""
idx = i + 1
self.current_state = STATE_HTTP_VERSION
continue
if self.current_state == STATE_HTTP_VERSION:
i = data.find(b"\n", idx)
if i == -1:
self.current_token += data[idx:]
break
else:
self.current_token += data[idx:i]
self.protocol.on_http_version(self.current_token[:-1])
self.current_token = b""
idx = i + 1
self.current_state = STATE_HEADER
continue
if self.current_state == STATE_HEADER:
i = data.find(b"\n", idx)
if i == -1:
self.current_token += data[idx:]
break
else:
self.current_token += data[idx:i]
if self.current_token == b"\r\n":
self.current_state = STATE_BODY
self.protocol.on_headers_completed()
else:
header = self.current_token[:-1]
if header:
self.protocol.on_header(*self.parse_header(header))
self.current_token = b""
idx = i + 1
if self.current_state == STATE_BODY:
self.protocol.on_body(data[idx:])
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment