Skip to content

Instantly share code, notes, and snippets.

@soura-b
Last active July 11, 2021 08:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save soura-b/1c285ef4d9bfc884e69db69fa68f6d20 to your computer and use it in GitHub Desktop.
Save soura-b/1c285ef4d9bfc884e69db69fa68f6d20 to your computer and use it in GitHub Desktop.
Download image over https using python
# using socket
import socket
import time
HOST = 'data.pr4e.org'
PORT = 80
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect((HOST, PORT))
mysock.sendall(b'GET http://data.pr4e.org/cover3.jpg HTTP/1.0\r\n\r\n')
count = 0
picture = b''
while True:
data = mysock.recv(5120) # receive data in groups of 5120 characters
if len(data) < 1: break
time.sleep(0.25) # allows server to be "ahead" of us by 0.25s
count += len(data)
print(len(data), count)
# if we uncomment time.sleep(0.25), we are more likely to get < 5210 bytes sometimes
picture = picture + data
mysock.close() # close socket
# Look for the end of the header file
pos = picture.find(b'\r\n\r\n') # end indicated by 2 * (endline + newline)
print('Header length ', pos)
print(picture[:pos].decode()) # pos marks the end
# Skip past the header and save the picture data
picture = picture[pos + 4:]
fhand = open('stuff.jpg', 'wb')
fhand.write(picture)
fhand.close() # must ensure that file is permanently saved
# using urllib, which simplifies the code
import urllib.request, urllib.parse, urllib.error
img = urllib.request.urlopen('http://data.pr4e.org/cover3.jpg') # img is a string variable
fhand = open('cover3.jpg', 'wb')
size = 0
while True:
info = img.read(100000) # reads 100,000 characters at a time, prevents crash
if len(info) < 1: break
size += len(info)
fhand.write(info) # for small files, we can simply go with fhand.write(img)
print(size, ' characters copied') # 230210 characters copied
fhand.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment