pelson/pep691.py

## pep691.py
import requests
import zlib
import struct
import json
import html5lib
import re
import pprint

_gzip_header = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff"


def gzip_app_iter(app_iter):  # Taken from WebOb, which Warehouse uses
    size = 0
    crc = zlib.crc32(b"") & 0xFFFFFFFF
    compress = zlib.compressobj(
        9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0
    )

    yield _gzip_header

    for item in app_iter:
        size += len(item)
        crc = zlib.crc32(item, crc) & 0xFFFFFFFF

        # The compress function may return zero length bytes if the input is
        # small enough; it buffers the input for the next iteration or for a
        # flush.
        result = compress.compress(item)

        if result:
            yield result

    # Similarly, flush may also not yield a value.
    result = compress.flush()

    if result:
        yield result
    yield struct.pack("<2L", crc, size & 0xFFFFFFFF)


def normalize(name):  # Taken from PEP 503
    return re.sub(r"[-_.]+", "-", name).lower()


data = {}


resp = requests.get("https://pypi.org/simple", stream=True)
resp.raise_for_status()

data["current.compressed"] = 0
for chunk in resp.raw.stream(1024, decode_content=False):
    data["current.compressed"] += len(chunk)


resp = requests.get("https://pypi.org/simple")
resp.raise_for_status()


data["current.uncompressed"] = len(resp.content)


jdata = {"meta": {"api-version": "1.0"}, "projects": {}}
jdata2 = {"meta": {"api-version": "1.0"}, "projects": {}}

html = html5lib.parse(resp.content, namespaceHTMLElements=False)
for link in html.findall(".//a"):
    jdata["projects"][normalize(link.text)] = {"url": link.attrib["href"]}
    jdata2["projects"][normalize(link.text)] = {
        "name": link.text,
        "url": link.attrib["href"],
    }


jcontent = json.dumps(jdata, sort_keys=True, separators=(",", ":")).encode("utf8")
jcontent2 = json.dumps(jdata2, sort_keys=True, separators=(",", ":")).encode("utf8")

data["pep691.uncompressed"] = len(jcontent)
data["pep691.name.uncompressed"] = len(jcontent2)


data["pep691.compressed"] = sum(map(len, list(gzip_app_iter([jcontent]))))
data["pep691.name.compressed"] = sum(map(len, list(gzip_app_iter([jcontent2]))))

pprint.pprint(data)
	import requests
	import zlib
	import struct
	import json
	import html5lib
	import re
	import pprint

	_gzip_header = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff"


	def gzip_app_iter(app_iter): # Taken from WebOb, which Warehouse uses
	size = 0
	crc = zlib.crc32(b"") & 0xFFFFFFFF
	compress = zlib.compressobj(
	9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0
	)

	yield _gzip_header

	for item in app_iter:
	size += len(item)
	crc = zlib.crc32(item, crc) & 0xFFFFFFFF

	# The compress function may return zero length bytes if the input is
	# small enough; it buffers the input for the next iteration or for a
	# flush.
	result = compress.compress(item)

	if result:
	yield result

	# Similarly, flush may also not yield a value.
	result = compress.flush()

	if result:
	yield result
	yield struct.pack("<2L", crc, size & 0xFFFFFFFF)


	def normalize(name): # Taken from PEP 503
	return re.sub(r"[-_.]+", "-", name).lower()


	data = {}


	resp = requests.get("https://pypi.org/simple", stream=True)
	resp.raise_for_status()

	data["current.compressed"] = 0
	for chunk in resp.raw.stream(1024, decode_content=False):
	data["current.compressed"] += len(chunk)


	resp = requests.get("https://pypi.org/simple")
	resp.raise_for_status()


	data["current.uncompressed"] = len(resp.content)


	jdata = {"meta": {"api-version": "1.0"}, "projects": {}}
	jdata2 = {"meta": {"api-version": "1.0"}, "projects": {}}

	html = html5lib.parse(resp.content, namespaceHTMLElements=False)
	for link in html.findall(".//a"):
	jdata["projects"][normalize(link.text)] = {"url": link.attrib["href"]}
	jdata2["projects"][normalize(link.text)] = {
	"name": link.text,
	"url": link.attrib["href"],
	}


	jcontent = json.dumps(jdata, sort_keys=True, separators=(",", ":")).encode("utf8")
	jcontent2 = json.dumps(jdata2, sort_keys=True, separators=(",", ":")).encode("utf8")

	data["pep691.uncompressed"] = len(jcontent)
	data["pep691.name.uncompressed"] = len(jcontent2)


	data["pep691.compressed"] = sum(map(len, list(gzip_app_iter([jcontent]))))
	data["pep691.name.compressed"] = sum(map(len, list(gzip_app_iter([jcontent2]))))

	pprint.pprint(data)