Skip to content

Instantly share code, notes, and snippets.

@mhasbini
Created November 5, 2020 03:53
Show Gist options
  • Save mhasbini/9f2070256b46238ad1d82e27931bd07e to your computer and use it in GitHub Desktop.
Save mhasbini/9f2070256b46238ad1d82e27931bd07e to your computer and use it in GitHub Desktop.
import httpy
req = httpy.get('http://httpbin.org/robots.txt')
req.status # => 200
req.data # => 'User-agent: *\nDisallow: /deny'
from parsers import parse_url, parse_response
from request_helper import get as raw_get
class Result:
def __init__(self, status, data):
self.status = status
self.data = data
def get(url):
return Result(*parse_response(raw_get(*parse_url(url))))
import pyparsing as pp
def parse_url(url):
"""Pase url based on this format:
http://[host[:port]]path[?query][#fragment]
>>> parse_url('http://httpbin.org/')
('httpbin.org', 80, '/')
>>> parse_url('http://httpbin.org/robots.txt')
('httpbin.org', 80, '/robots.txt')
>>> parse_url('http://test:1234/lorem?a=b#c')
('test', 1234, '/lorem?a=b')
>>> parse_url('https://mhasbini.com/')
Traceback (most recent call last):
...
ValueError: Invalid URL
>>> parse_url('httpmhasbini.com')
Traceback (most recent call last):
...
ValueError: Invalid URL
"""
host_pp = pp.Word(pp.alphanums + '.' + pp.alphas).setResultsName('host')
port_pp = pp.pyparsing_common.signed_integer.setResultsName('port')
path_pp = pp.Combine('/' + pp.Optional(pp.Word(pp.srange("[a-zA-Z0-9.-_~!$&'()*+,;=:@]")))).setResultsName('path')
fragment_pp = pp.Optional('#' + pp.Word(pp.srange("[a-zA-Z0-9/?"))).setResultsName('fragment')
syntax_pp = 'http://' + host_pp + pp.Optional(':' + port_pp) + path_pp + fragment_pp
try:
result = syntax_pp.parseString(url)
except pp.ParseException:
raise ValueError('Invalid URL')
return result.get('host'), result.get('port', 80), result.get('path')
def parse_response(raw_response):
"""Parse raw http response and return status code and body.
>>> parse_response('HTTP/1.1 200 OK\\r\\nDate: Thu, 05 Nov 2020 03:22:48 GMT\\r\\nContent-Type: text/plain\\r\\nContent-Length: 30\\r\\nConnection: close\\r\\nServer: gunicorn/19.9.0\\r\\nAccess-Control-Allow-Origin: *\\r\\nAccess-Control-Allow-Credentials: true\\r\\n\\r\\nUser-agent: *\\nDisallow: /deny\\n')
(200, 'User-agent: *\\nDisallow: /deny')
>>> parse_response('lorem ipsum')
Traceback (most recent call last):
...
ValueError: Invalid raw response
"""
DELIMITER = '\r\n\r\n'
status_pp = pp.pyparsing_common.signed_integer.setResultsName('status')
body_pp = pp.SkipTo(pp.Regex(r'$')).setResultsName('body')
response_pp = pp.LineStart() + 'HTTP/1.1' + status_pp + pp.SkipTo(DELIMITER) + body_pp + pp.LineEnd()
try:
result = response_pp.parseString(raw_response)
except pp.ParseException:
raise ValueError('Invalid raw response')
return result.get('status'), result.get('body')
import socket
class ConnectionError(OSError):
"""Raised when a socket connection fail for any reason"""
pass
def get(host, port, path):
"""Open connection and send GET HTTP request and return raw response
>>> get('httpbin.org', 80, '/robots.txt') # doctest:+ELLIPSIS
'HTTP/1.1 200 OK\\r\\nDate: ...\\r\\nContent-Type: text/plain\\r\\nContent-Length: 30\\r\\nConnection: close\\r\\nServer: gunicorn/19.9.0\\r\\nAccess-Control-Allow-Origin: *\\r\\nAccess-Control-Allow-Credentials: true\\r\\n\\r\\nUser-agent: *\\nDisallow: /deny\\n'
>>> get('mhasbini.com', 1234, '/robots.txt')
Traceback (most recent call last):
...
tt2.ConnectionError: [Errno 113] No route to host
"""
# Generate request message
request_m = f'GET {path} HTTP/1.1\r\n'
request_m += f'Host: {host}:{port}\r\n'
request_m += 'Connection: close\r\n'
request_m += '\r\n'
try:
# AF_INET -> ipv4
# SOCK_STREAM -> TCP
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((host, port))
sock.sendall(request_m.encode())
# Get data 1024 bytes at a time
data = b''
while True:
_buffer = sock.recv(1024)
if not _buffer:
break
data += _buffer
sock.close()
return data.decode()
except OSError as e:
raise ConnectionError(str(e)) from None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment