Skip to content

Instantly share code, notes, and snippets.

@vimiix
Forked from the5fire/brower.py
Created April 10, 2018 11:13
Show Gist options
  • Save vimiix/44440e4c3bcd57d7bc53512c8245b525 to your computer and use it in GitHub Desktop.
Save vimiix/44440e4c3bcd57d7bc53512c8245b525 to your computer and use it in GitHub Desktop.
简单理解socket模拟浏览器请求
# author: the5fire.com
import re
import socket
from collections import namedtuple
RE_URL = re.compile(r'http://(.*)')
RE_CONTENT_LENGTH = re.compile(b'.*Content-Length: (\d+)')
EOF = '\r\n\r\n'
Response = namedtuple('Response', ['header', 'body'])
class Browser:
def parse_url(self, url):
search = RE_URL.search(url)
result = search.group(1)
domain, path = result.split('/', 1)
return domain, '/' + path
def build_payload(self, domain, path, header):
segments = [
'GET %s HTTP/1.1' % path,
'Host: %s' % domain,
]
if header:
segments.extend(['%s: %s' % (k, v) for k, v in header.items()])
payload = '\r\n'.join(segments) + EOF
return payload.encode('utf-8')
def create_sock(self, domain):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((domain, 80))
return sock
def get(self, url, header=None):
domain, path = self.parse_url(url)
print(domain, path)
payload = self.build_payload(domain, path, header)
sock = self.create_sock(domain)
print(payload)
sock.sendall(payload)
return self.create_response(sock)
def create_response(self, sock):
MSGLEN = 1014
content_length = 0
recv_length = 0
is_head = True
header = ''
result = []
while True:
data = sock.recv(MSGLEN)
if not content_length:
re_search = RE_CONTENT_LENGTH.search(data)
if re_search:
content_length = int(re_search.groups()[0])
if is_head:
header, data = data.split(b'\r\n\r\n')
is_head = False
result.append(data)
recv_length += len(data)
if content_length and recv_length >= content_length:
break
body = b''.join(result)
return Response(header=header, body=body)
browser = Browser()
if __name__ == '__main__':
response = browser.get('http://imgsrc.baidu.com/baike/pic/item/e850352ac65c1038bcba9c0eb9119313b17e8932.jpg')
with open('download.jpg', 'wb') as f:
f.write(response.body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment