Created
January 4, 2020 09:44
-
-
Save rsotnychenko/3bee49bc9a8cba66286eedc8bae39620 to your computer and use it in GitHub Desktop.
Calculate size of docker registry v2 repo (dirty)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import re | |
import json | |
from pprint import pprint | |
import os | |
import struct | |
import sys | |
base_path = sys.argv[1] + '/v2' | |
repositories_path = base_path + '/repositories' | |
blobs_path = base_path + '/blobs' | |
class LayersRepo: | |
def __init__(self): | |
self.layers = {} | |
@staticmethod | |
def _get_uncompressed_size(filename): | |
with open(filename, 'rb') as f: | |
if f.read(2) == b'\x1f\x8b': | |
f.seek(-4, 2) | |
return struct.unpack('I', f.read(4))[0] | |
else: | |
return os.stat(filename).st_size | |
@staticmethod | |
def _load_layer(digest: str): | |
filename = blobs_path + "/sha256/%s/%s/data" % (digest[0:2], digest) | |
return LayersRepo._get_uncompressed_size(filename) | |
def get_layer_size(self, digest: str): | |
if not self.layers.__contains__(digest): | |
self.layers[digest] = LayersRepo._load_layer(digest) | |
return self.layers[digest] | |
layer_repo = LayersRepo() | |
class ManifestsRepo: | |
def __init__(self): | |
self.manifests = {} | |
@staticmethod | |
def _load_manifest(_hash: str): | |
filename = blobs_path + "/sha256/%s/%s/data" % (_hash[0:2], _hash) | |
layers_raw = json.loads(open(filename, 'r').read())['layers'] | |
layers = [] | |
for layer in layers_raw: | |
digest = layer['digest'][7:] | |
compressed_size = layer['size'] | |
uncompressed_size = layer_repo.get_layer_size(digest) | |
layers += [[digest, compressed_size, uncompressed_size]] | |
return layers | |
def get_info(self, _hash: str): | |
if not self.manifests.__contains__(_hash): | |
self.manifests[_hash] = ManifestsRepo._load_manifest(_hash) | |
return self.manifests[_hash] | |
manifest_repo = ManifestsRepo() | |
class ImageRepo: | |
def __init__(self): | |
self.images = {} | |
def register_image(self, repo: str, tag: str, digest: str): | |
name = '%s:%s' % (repo, tag) | |
if not self.images.__contains__(digest): | |
manifest = manifest_repo.get_info(digest) | |
self.images[digest] = { | |
'tags': [name], | |
# 'compressed_size_mb': sum(map(lambda x: x[1], manifest)) / (1024 * 1024), | |
# 'uncompressed_size_mb': sum(map(lambda x: x[2], manifest)) / (1024 * 1024), | |
'layers': manifest, | |
'layer_count': manifest.__len__(), | |
} | |
else: | |
self.images[digest]['tags'] += [name] | |
def __str__(self): | |
return self.images.__str__() | |
def __repr__(self): | |
return self.__str__() | |
image_repo = ImageRepo() | |
repositories_files = glob.iglob(repositories_path + '/**/link', recursive=True) | |
manifests = list(x for x in repositories_files if x.__contains__('_manifests') | |
and x.__contains__('sha256') | |
and x.__contains__('tags') | |
and (sys.argv.__len__() < 3 or x.__contains__(sys.argv[2]))) | |
for manifest in manifests: | |
match = re.match(repositories_path + '/(.*)/_manifests/tags/(.*)/index/sha256/(.*)/link', manifest) | |
image_repo.register_image(match.group(1), match.group(2), match.group(3)) | |
pprint(image_repo.images) | |
layers = set() | |
for key in image_repo.images: | |
for layer in image_repo.images[key]['layers']: | |
layers.add(tuple(layer)) | |
def get_layers_total_compressed_size_mb(layers): | |
return sum(map(lambda x: x[1], layers)) / (1024 * 1024) | |
def get_layers_total_uncompressed_size_mb(layers): | |
return sum(map(lambda x: x[2], layers)) / (1024 * 1024) | |
print('Referenced size: %.2fMB (%.2fMB on disk) in %d layers' % ( | |
get_layers_total_uncompressed_size_mb(layers), | |
get_layers_total_compressed_size_mb(layers), | |
layers.__len__())) | |
repositories_files = glob.iglob(repositories_path + '/**/link', recursive=True) | |
manifests = list(x for x in repositories_files if x.__contains__('_layers') | |
and (sys.argv.__len__() < 3 or x.__contains__(sys.argv[2]))) | |
layers = set() | |
for manifest in manifests: | |
match = re.match(repositories_path + '/.*/_layers/sha256/(.*)/link', manifest) | |
layers.add(match.group(1)) | |
print('Total size: %.2fMB on disk in %d layers' % ( | |
sum(map(lambda digest: layer_repo.get_layer_size(digest), layers)) / (1024 * 1024), | |
layers.__len__())) | |
# | |
# tags = { | |
# '__layers': set(), | |
# '__versions': set(), | |
# '__compressed_size': 0, | |
# '__uncompressed_size': 0 | |
# } | |
# for key in image_repo.images: | |
# for tag in image_repo.images[key]['tags']: | |
# nested_tags = (tag.split(':')[0]).split('/') | |
# curr = tags | |
# for nested_tag in nested_tags: | |
# if not curr.__contains__(nested_tag): | |
# curr[nested_tag] = { | |
# '__layers': set(), | |
# '__versions': set(), | |
# '__compressed_size': 0, | |
# '__uncompressed_size': 0 | |
# } | |
# curr = curr[nested_tag] | |
# | |
# for layer in image_repo.images[key]['layers']: | |
# curr['__layers'].add(layer) | |
# curr['__versions'] += [tag.split(':')[1]] | |
# | |
# # pprint(tags) | |
# | |
# | |
# def calculate_size(tags: map, filter: ): | |
# compressed_size = 0 | |
# uncompressed_size = 0 | |
# | |
# for subtag in tags: | |
# if not subtag.startswith('__'): | |
# compressed_size, uncompressed_size = calculate_size(tags[subtag]) | |
# | |
# if tags['__layers'].__len__ != 0: | |
# for digest in tags['__images']: | |
# layers = image_repo.images[digest]['layers'] | |
# compressed_size += get_layers_total_compressed_size_mb(layers) | |
# uncompressed_size += get_layers_total_uncompressed_size_mb(layers) | |
# | |
# tags['__compressed_size'] = compressed_size | |
# tags['__uncompressed_size'] = uncompressed_size | |
# | |
# return tuple([compressed_size, uncompressed_size]) | |
# | |
# | |
# print(calculate_size(tags)) | |
# | |
# pprint(tags) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment