Skip to content

Instantly share code, notes, and snippets.

@rsotnychenko
Created January 4, 2020 09:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rsotnychenko/3bee49bc9a8cba66286eedc8bae39620 to your computer and use it in GitHub Desktop.
Save rsotnychenko/3bee49bc9a8cba66286eedc8bae39620 to your computer and use it in GitHub Desktop.
Calculate size of docker registry v2 repo (dirty)
import glob
import re
import json
from pprint import pprint
import os
import struct
import sys
base_path = sys.argv[1] + '/v2'
repositories_path = base_path + '/repositories'
blobs_path = base_path + '/blobs'
class LayersRepo:
def __init__(self):
self.layers = {}
@staticmethod
def _get_uncompressed_size(filename):
with open(filename, 'rb') as f:
if f.read(2) == b'\x1f\x8b':
f.seek(-4, 2)
return struct.unpack('I', f.read(4))[0]
else:
return os.stat(filename).st_size
@staticmethod
def _load_layer(digest: str):
filename = blobs_path + "/sha256/%s/%s/data" % (digest[0:2], digest)
return LayersRepo._get_uncompressed_size(filename)
def get_layer_size(self, digest: str):
if not self.layers.__contains__(digest):
self.layers[digest] = LayersRepo._load_layer(digest)
return self.layers[digest]
layer_repo = LayersRepo()
class ManifestsRepo:
def __init__(self):
self.manifests = {}
@staticmethod
def _load_manifest(_hash: str):
filename = blobs_path + "/sha256/%s/%s/data" % (_hash[0:2], _hash)
layers_raw = json.loads(open(filename, 'r').read())['layers']
layers = []
for layer in layers_raw:
digest = layer['digest'][7:]
compressed_size = layer['size']
uncompressed_size = layer_repo.get_layer_size(digest)
layers += [[digest, compressed_size, uncompressed_size]]
return layers
def get_info(self, _hash: str):
if not self.manifests.__contains__(_hash):
self.manifests[_hash] = ManifestsRepo._load_manifest(_hash)
return self.manifests[_hash]
manifest_repo = ManifestsRepo()
class ImageRepo:
def __init__(self):
self.images = {}
def register_image(self, repo: str, tag: str, digest: str):
name = '%s:%s' % (repo, tag)
if not self.images.__contains__(digest):
manifest = manifest_repo.get_info(digest)
self.images[digest] = {
'tags': [name],
# 'compressed_size_mb': sum(map(lambda x: x[1], manifest)) / (1024 * 1024),
# 'uncompressed_size_mb': sum(map(lambda x: x[2], manifest)) / (1024 * 1024),
'layers': manifest,
'layer_count': manifest.__len__(),
}
else:
self.images[digest]['tags'] += [name]
def __str__(self):
return self.images.__str__()
def __repr__(self):
return self.__str__()
image_repo = ImageRepo()
repositories_files = glob.iglob(repositories_path + '/**/link', recursive=True)
manifests = list(x for x in repositories_files if x.__contains__('_manifests')
and x.__contains__('sha256')
and x.__contains__('tags')
and (sys.argv.__len__() < 3 or x.__contains__(sys.argv[2])))
for manifest in manifests:
match = re.match(repositories_path + '/(.*)/_manifests/tags/(.*)/index/sha256/(.*)/link', manifest)
image_repo.register_image(match.group(1), match.group(2), match.group(3))
pprint(image_repo.images)
layers = set()
for key in image_repo.images:
for layer in image_repo.images[key]['layers']:
layers.add(tuple(layer))
def get_layers_total_compressed_size_mb(layers):
return sum(map(lambda x: x[1], layers)) / (1024 * 1024)
def get_layers_total_uncompressed_size_mb(layers):
return sum(map(lambda x: x[2], layers)) / (1024 * 1024)
print('Referenced size: %.2fMB (%.2fMB on disk) in %d layers' % (
get_layers_total_uncompressed_size_mb(layers),
get_layers_total_compressed_size_mb(layers),
layers.__len__()))
repositories_files = glob.iglob(repositories_path + '/**/link', recursive=True)
manifests = list(x for x in repositories_files if x.__contains__('_layers')
and (sys.argv.__len__() < 3 or x.__contains__(sys.argv[2])))
layers = set()
for manifest in manifests:
match = re.match(repositories_path + '/.*/_layers/sha256/(.*)/link', manifest)
layers.add(match.group(1))
print('Total size: %.2fMB on disk in %d layers' % (
sum(map(lambda digest: layer_repo.get_layer_size(digest), layers)) / (1024 * 1024),
layers.__len__()))
#
# tags = {
# '__layers': set(),
# '__versions': set(),
# '__compressed_size': 0,
# '__uncompressed_size': 0
# }
# for key in image_repo.images:
# for tag in image_repo.images[key]['tags']:
# nested_tags = (tag.split(':')[0]).split('/')
# curr = tags
# for nested_tag in nested_tags:
# if not curr.__contains__(nested_tag):
# curr[nested_tag] = {
# '__layers': set(),
# '__versions': set(),
# '__compressed_size': 0,
# '__uncompressed_size': 0
# }
# curr = curr[nested_tag]
#
# for layer in image_repo.images[key]['layers']:
# curr['__layers'].add(layer)
# curr['__versions'] += [tag.split(':')[1]]
#
# # pprint(tags)
#
#
# def calculate_size(tags: map, filter: ):
# compressed_size = 0
# uncompressed_size = 0
#
# for subtag in tags:
# if not subtag.startswith('__'):
# compressed_size, uncompressed_size = calculate_size(tags[subtag])
#
# if tags['__layers'].__len__ != 0:
# for digest in tags['__images']:
# layers = image_repo.images[digest]['layers']
# compressed_size += get_layers_total_compressed_size_mb(layers)
# uncompressed_size += get_layers_total_uncompressed_size_mb(layers)
#
# tags['__compressed_size'] = compressed_size
# tags['__uncompressed_size'] = uncompressed_size
#
# return tuple([compressed_size, uncompressed_size])
#
#
# print(calculate_size(tags))
#
# pprint(tags)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment