ajelenak/h5stat-chunks.py

## h5stat-chunks.py
from collections import defaultdict
from typing import Union
from dataclasses import dataclass
import argparse
from functools import partial, reduce
import operator
import math
import h5py


@dataclass(slots=True, frozen=True)
class ChunkStats:
    """Various chunk statistics for one HDF5 dataset."""
    name: str
    num_stored: int
    size: int
    stor_size: int
    extent_ratio: float
    page_bins: dict
    page_spread_anomaly: int

    def __post_init__(self):
        if self.extent_ratio > 1:
            raise ValueError(f'Chunk shape ratio greater than 1 for {self.name}')
        if self.page_spread_anomaly < 0:
            raise ValueError(f'Chunk file page spread anomaly negative for {self.name}')


def get_cli_args():
    parser = argparse.ArgumentParser(
        description='Provide collective dataset chunk stats that h5stat does not do.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('h5file', help='Input HDF5 file name.', type=str)
    parser.add_argument('--show', help='Print individual dataset stats',
                        action='store_true')
    parser.add_argument('--json', help='Format individual dataset stats in JSON',
                        action='store_true')
    return parser.parse_args()


def chunk_to_shape_ratio(chunk: tuple, shape: tuple) -> float:
    """Ratio of chunk to dataset shape extent."""
    ratio = 1
    for c, s in zip(chunk, shape):
        try:
            ratio *= min(1, c/s)
        except ZeroDivisionError:
            # Deal with 1D datasets without data...
            continue
    return ratio


def chunk2page(dset: h5py.Dataset, page_size: int) -> dict:
    """Determine file page for each chunk.

    Only for files with "PAGE" file space strategy.
    """
    def chunk_info(chunk_stor, stinfo):
        start_page = math.floor(chunk_stor.byte_offset / page_size) + 1
        end_page = math.floor((chunk_stor.byte_offset + chunk_stor.size - 1) / page_size) + 1
        if start_page != end_page:
            raise ValueError(f'Chunk crosses file page boundary: {chunk_stor}')
        stinfo[start_page] += 1

    stinfo = defaultdict(int)
    dset.id.chunk_iter(partial(chunk_info, stinfo=stinfo))

    return stinfo


def dset_stats(
    name: str,
    h5obj: Union[h5py.Group, h5py.Dataset],
    dset_list: list[ChunkStats],
    page_size: int = 0
) -> None:
    if isinstance(h5obj, h5py.Dataset):
        chunk_shape = h5obj.chunks
        if chunk_shape:
            chunk_nelem = reduce(operator.mul, chunk_shape, 1)
            if page_size:
                chunk_page = chunk2page(h5obj, page_size)
                num_chunks = reduce(operator.add, chunk_page.values(), 0)
                stored_size = h5obj.id.get_storage_size()
                page_spread = len(chunk_page) - math.ceil(stored_size / page_size)
            else:
                num_chunks = h5obj.id.get_num_chunks()
                stored_size = h5obj.id.get_storage_size()
                chunk_page = dict()
                page_spread = 0
            dset_list.append(ChunkStats(
                name=h5obj.name,
                num_stored=num_chunks,
                extent_ratio=chunk_to_shape_ratio(chunk_shape, h5obj.shape),
                stor_size=stored_size,
                size=h5obj.id.get_type().get_size() * chunk_nelem,
                page_bins=chunk_page,
                page_spread_anomaly=page_spread
            ))


cli = get_cli_args()

num_chunks = [
    {'count': 0, 'text': '# of datasets with no chunks'},
    {'count': 0, 'text': '# of datasets with 1 chunk'},
    {'count': 0, 'text': '# of datasets with chunks 2 - 10'},
    {'count': 0, 'text': '# of datasets with chunks 11 - 100'},
    {'count': 0, 'text': '# of datasets with chunks 101 - 1000'},
    {'count': 0, 'text': '# of datasets with chunks 1001 - 10_000'},
    {'count': 0, 'text': '# of datasets with chunks 10_001 - 100_000'},
    {'count': 0, 'text': '# of datasets with chunks > 100_000'},
]
chunk_size = [
    {'count': 0, 'text': '# of datasets with chunk sizes 0 - 1000 bytes'},
    {'count': 0, 'text': '# of datasets with chunk sizes 1001 - 10_000 bytes'},
    {'count': 0, 'text': '# of datasets with chunk sizes 10_001 - 100_000 bytes'},
    {'count': 0, 'text': '# of datasets with chunk sizes 100_001 - 1_000_000 bytes'},
    {'count': 0, 'text': '# of datasets with chunk sizes 1_000_001 - 10_000_000 bytes'},
    {'count': 0, 'text': '# of datasets with chunk sizes > 10_000_000 bytes'},
]
chunk_ext = [
    {'count': 0, 'text': '# of datasets with ratio <= 0.1%', 'min_val': 0, 'max_val': 0.001},
    {'count': 0, 'text': '# of datasets with 0.1% < ratio <= 0.2%', 'min_val': 0.001, 'max_val': 0.002},
    {'count': 0, 'text': '# of datasets with 0.2% < ratio <= 0.3%', 'min_val': 0.002, 'max_val': 0.003},
    {'count': 0, 'text': '# of datasets with 0.3% < ratio <= 0.4%', 'min_val': 0.003, 'max_val': 0.004},
    {'count': 0, 'text': '# of datasets with 0.4% < ratio <= 0.5%', 'min_val': 0.004, 'max_val': 0.005},
    {'count': 0, 'text': '# of datasets with 0.5% < ratio <= 1%', 'min_val': 0.005, 'max_val': 0.01},
    {'count': 0, 'text': '# of datasets with 1% < ratio <= 2%', 'min_val': 0.01, 'max_val': 0.02},
    {'count': 0, 'text': '# of datasets with 2% < ratio <= 3%', 'min_val': 0.02, 'max_val': 0.03},
    {'count': 0, 'text': '# of datasets with 3% < ratio <= 4%', 'min_val': 0.03, 'max_val': 0.04},
    {'count': 0, 'text': '# of datasets with 4% < ratio <= 5%', 'min_val': 0.04, 'max_val': 0.05},
    {'count': 0, 'text': '# of datasets with 5% < ratio <= 10%', 'min_val': 0.05, 'max_val': 0.1},
    {'count': 0, 'text': '# of datasets with 10% < ratio <= 25%', 'min_val': 0.1, 'max_val': 0.25},
    {'count': 0, 'text': '# of datasets with ratio > 25%', 'min_val': 0.25, 'max_val': 1},
]
chunk_pages = [
    {'count': 0, 'text': '# of datasets with chunks in 1 file page'},
    {'count': 0, 'text': '# of datasets with chunks in 2 file pages'},
    {'count': 0, 'text': '# of datasets with chunks in 3 file pages'},
    {'count': 0, 'text': '# of datasets with chunks in 4 file pages'},
    {'count': 0, 'text': '# of datasets with chunks in 5 file pages'},
    {'count': 0, 'text': '# of datasets with chunks in >5 file pages'},
]
page_anomaly = [
    {'count': 0, 'text': '# of datasets entirely in 1 file page'},
    {'count': 0, 'text': '# of datasets in 1 extra file page'},
    {'count': 0, 'text': '# of datasets in 2 extra file pages'},
    {'count': 0, 'text': '# of datasets in 3 extra file pages'},
    {'count': 0, 'text': '# of datasets in >3 extra file pages'},
]
most_page = [
    {'count': 0, 'text': '# of datasets in 0 - 10%'},
    {'count': 0, 'text': '# of datasets in 10 - 20%'},
    {'count': 0, 'text': '# of datasets in 20 - 30%'},
    {'count': 0, 'text': '# of datasets in 30 - 40%'},
    {'count': 0, 'text': '# of datasets in 40 - 50%'},
    {'count': 0, 'text': '# of datasets in 50 - 60%'},
    {'count': 0, 'text': '# of datasets in 60 - 70%'},
    {'count': 0, 'text': '# of datasets in 70 - 80%'},
    {'count': 0, 'text': '# of datasets in 80 - 90%'},
    {'count': 0, 'text': '# of datasets in 90 - 100%'},
]

dset_info = list()
with h5py.File(cli.h5file, mode='r') as f:
    fcpl = f.id.get_create_plist()
    page = fcpl.get_file_space_strategy()[0] == h5py.h5f.FSPACE_STRATEGY_PAGE
    if page:
        page_size = fcpl.get_file_space_page_size()
    else:
        f.visititems(partial(dset_stats, dset_list=dset_info, page_size=0))

if page and page_size:
    with h5py.File(cli.h5file, mode='r', page_buf_size=4 * page_size) as f:
        f.visititems(partial(dset_stats, dset_list=dset_info, page_size=page_size))

if cli.show:
    if cli.json:
        print('[')
        flag = False
        for _ in sorted(dset_info, key=lambda d: d.name):
            if flag:
                print(',')
            else:
                flag = True
            if page:
                print(f'{{"dataset":"{_.name}","stored_size":{_.stor_size},"chunks_stored":{_.num_stored},'
                    f'"chunk_size":{_.size},"chunk_shape_ratio":{_.extent_ratio:.6g},'
                    f'"file_pages":{len(_.page_bins)},"page_spread_anomaly":{_.page_spread_anomaly}}}', end='')
            else:
                print(f'{{"dataset":"{_.name}","stored_size":{_.stor_size},"chunks_stored":{_.num_stored},'
                    f'"chunk_size":{_.size},"chunk_shape_ratio":{_.extent_ratio:.6g}}}', end='')
        print('\n]')
    else:
        for _ in sorted(dset_info, key=lambda d: d.name):
            if page:
                print(f'dataset {_.name} stored_size={_.stor_size} chunks_stored={_.num_stored}'
                    f' chunk_size={_.size} chunk_shape_ratio={_.extent_ratio:.6g}'
                    f' file_pages={len(_.page_bins)} page_spread_anomaly={_.page_spread_anomaly}')
            else:
                print(f'dataset {_.name} stored_size={_.stor_size} chunks_stored={_.num_stored}'
                    f' chunk_size={_.size} chunk_shape_ratio={_.extent_ratio:.6g}')
    raise SystemExit()

for ds_info in dset_info:
    try:
        num_chunks[math.ceil(math.log10(ds_info.num_stored)) + 1]['count'] += 1
    except IndexError:
        num_chunks[-1]['count'] += 1
    except ValueError:
        num_chunks[0]['count'] += 1

    try:
        chunk_size[max(3, math.ceil(math.log10(ds_info.size))) - 3]['count'] += 1
    except IndexError:
        chunk_size[-1]['count'] += 1

    for i, ce in enumerate(chunk_ext):
        if ce['min_val'] < ds_info.extent_ratio <= ce['max_val']:
            ce['count'] += 1
            break

if page:
    for ds_info in dset_info:
        try:
            chunk_pages[len(ds_info.page_bins) - 1]['count'] += 1
        except IndexError:
            chunk_pages[-1]['count'] += 1

        try:
            page_anomaly[ds_info.page_spread_anomaly]['count'] += 1
        except IndexError:
            page_anomaly[-1]['count'] += 1

        max_prcnt = max([_ / ds_info.num_stored for _ in ds_info.page_bins.values()])
        try:
            most_page[math.floor(max_prcnt * 10)]['count'] += 1
        except IndexError:
            most_page[-1]['count'] += 1

chunked_dsets = len(dset_info)
for blah in (num_chunks, chunk_size, chunk_ext):
    if sum([_['count'] for _ in blah]) != chunked_dsets:
        raise ValueError('Sanity check failed: Number of chunked datasets different')

print(f'Dataset chunk statistics for {cli.h5file}:')
if page:
    print(f'"PAGE" file space strategy with page size of {page_size:,} bytes.')
print(f'\tTotal chunked datasets: {chunked_dsets}')
print('\tChunk bins:')
for _ in num_chunks:
    print(f"\t\t{_['text']}: {_['count']}")
print('\tChunk size bins:')
for _ in chunk_size:
    print(f"\t\t{_['text']}: {_['count']}")
print('\tChunk to dataset shape ratio bins:')
for _ in chunk_ext:
    print(f"\t\t{_['text']}: {_['count']}")
if page:
    print('\tChunk file pages bins:')
    for _ in chunk_pages:
        print(f"\t\t{_['text']}: {_['count']}")
    print('\tChunk file page spread anomaly (extra file pages based on total dataset storage size):')
    for _ in page_anomaly:
        print(f"\t\t{_['text']}: {_['count']}")
    print('\tMaximum percentage of chunks in a single file page:')
    for _ in most_page:
        print(f"\t\t{_['text']}: {_['count']}")
	from collections import defaultdict
	from typing import Union
	from dataclasses import dataclass
	import argparse
	from functools import partial, reduce
	import operator
	import math
	import h5py


	@dataclass(slots=True, frozen=True)
	class ChunkStats:
	"""Various chunk statistics for one HDF5 dataset."""
	name: str
	num_stored: int
	size: int
	stor_size: int
	extent_ratio: float
	page_bins: dict
	page_spread_anomaly: int

	def __post_init__(self):
	if self.extent_ratio > 1:
	raise ValueError(f'Chunk shape ratio greater than 1 for {self.name}')
	if self.page_spread_anomaly < 0:
	raise ValueError(f'Chunk file page spread anomaly negative for {self.name}')


	def get_cli_args():
	parser = argparse.ArgumentParser(
	description='Provide collective dataset chunk stats that h5stat does not do.',
	formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('h5file', help='Input HDF5 file name.', type=str)
	parser.add_argument('--show', help='Print individual dataset stats',
	action='store_true')
	parser.add_argument('--json', help='Format individual dataset stats in JSON',
	action='store_true')
	return parser.parse_args()


	def chunk_to_shape_ratio(chunk: tuple, shape: tuple) -> float:
	"""Ratio of chunk to dataset shape extent."""
	ratio = 1
	for c, s in zip(chunk, shape):
	try:
	ratio *= min(1, c/s)
	except ZeroDivisionError:
	# Deal with 1D datasets without data...
	continue
	return ratio


	def chunk2page(dset: h5py.Dataset, page_size: int) -> dict:
	"""Determine file page for each chunk.

	Only for files with "PAGE" file space strategy.
	"""
	def chunk_info(chunk_stor, stinfo):
	start_page = math.floor(chunk_stor.byte_offset / page_size) + 1
	end_page = math.floor((chunk_stor.byte_offset + chunk_stor.size - 1) / page_size) + 1
	if start_page != end_page:
	raise ValueError(f'Chunk crosses file page boundary: {chunk_stor}')
	stinfo[start_page] += 1

	stinfo = defaultdict(int)
	dset.id.chunk_iter(partial(chunk_info, stinfo=stinfo))

	return stinfo


	def dset_stats(
	name: str,
	h5obj: Union[h5py.Group, h5py.Dataset],
	dset_list: list[ChunkStats],
	page_size: int = 0
	) -> None:
	if isinstance(h5obj, h5py.Dataset):
	chunk_shape = h5obj.chunks
	if chunk_shape:
	chunk_nelem = reduce(operator.mul, chunk_shape, 1)
	if page_size:
	chunk_page = chunk2page(h5obj, page_size)
	num_chunks = reduce(operator.add, chunk_page.values(), 0)
	stored_size = h5obj.id.get_storage_size()
	page_spread = len(chunk_page) - math.ceil(stored_size / page_size)
	else:
	num_chunks = h5obj.id.get_num_chunks()
	stored_size = h5obj.id.get_storage_size()
	chunk_page = dict()
	page_spread = 0
	dset_list.append(ChunkStats(
	name=h5obj.name,
	num_stored=num_chunks,
	extent_ratio=chunk_to_shape_ratio(chunk_shape, h5obj.shape),
	stor_size=stored_size,
	size=h5obj.id.get_type().get_size() * chunk_nelem,
	page_bins=chunk_page,
	page_spread_anomaly=page_spread
	))


	cli = get_cli_args()

	num_chunks = [
	{'count': 0, 'text': '# of datasets with no chunks'},
	{'count': 0, 'text': '# of datasets with 1 chunk'},
	{'count': 0, 'text': '# of datasets with chunks 2 - 10'},
	{'count': 0, 'text': '# of datasets with chunks 11 - 100'},
	{'count': 0, 'text': '# of datasets with chunks 101 - 1000'},
	{'count': 0, 'text': '# of datasets with chunks 1001 - 10_000'},
	{'count': 0, 'text': '# of datasets with chunks 10_001 - 100_000'},
	{'count': 0, 'text': '# of datasets with chunks > 100_000'},
	]
	chunk_size = [
	{'count': 0, 'text': '# of datasets with chunk sizes 0 - 1000 bytes'},
	{'count': 0, 'text': '# of datasets with chunk sizes 1001 - 10_000 bytes'},
	{'count': 0, 'text': '# of datasets with chunk sizes 10_001 - 100_000 bytes'},
	{'count': 0, 'text': '# of datasets with chunk sizes 100_001 - 1_000_000 bytes'},
	{'count': 0, 'text': '# of datasets with chunk sizes 1_000_001 - 10_000_000 bytes'},
	{'count': 0, 'text': '# of datasets with chunk sizes > 10_000_000 bytes'},
	]
	chunk_ext = [
	{'count': 0, 'text': '# of datasets with ratio <= 0.1%', 'min_val': 0, 'max_val': 0.001},
	{'count': 0, 'text': '# of datasets with 0.1% < ratio <= 0.2%', 'min_val': 0.001, 'max_val': 0.002},
	{'count': 0, 'text': '# of datasets with 0.2% < ratio <= 0.3%', 'min_val': 0.002, 'max_val': 0.003},
	{'count': 0, 'text': '# of datasets with 0.3% < ratio <= 0.4%', 'min_val': 0.003, 'max_val': 0.004},
	{'count': 0, 'text': '# of datasets with 0.4% < ratio <= 0.5%', 'min_val': 0.004, 'max_val': 0.005},
	{'count': 0, 'text': '# of datasets with 0.5% < ratio <= 1%', 'min_val': 0.005, 'max_val': 0.01},
	{'count': 0, 'text': '# of datasets with 1% < ratio <= 2%', 'min_val': 0.01, 'max_val': 0.02},
	{'count': 0, 'text': '# of datasets with 2% < ratio <= 3%', 'min_val': 0.02, 'max_val': 0.03},
	{'count': 0, 'text': '# of datasets with 3% < ratio <= 4%', 'min_val': 0.03, 'max_val': 0.04},
	{'count': 0, 'text': '# of datasets with 4% < ratio <= 5%', 'min_val': 0.04, 'max_val': 0.05},
	{'count': 0, 'text': '# of datasets with 5% < ratio <= 10%', 'min_val': 0.05, 'max_val': 0.1},
	{'count': 0, 'text': '# of datasets with 10% < ratio <= 25%', 'min_val': 0.1, 'max_val': 0.25},
	{'count': 0, 'text': '# of datasets with ratio > 25%', 'min_val': 0.25, 'max_val': 1},
	]
	chunk_pages = [
	{'count': 0, 'text': '# of datasets with chunks in 1 file page'},
	{'count': 0, 'text': '# of datasets with chunks in 2 file pages'},
	{'count': 0, 'text': '# of datasets with chunks in 3 file pages'},
	{'count': 0, 'text': '# of datasets with chunks in 4 file pages'},
	{'count': 0, 'text': '# of datasets with chunks in 5 file pages'},
	{'count': 0, 'text': '# of datasets with chunks in >5 file pages'},
	]
	page_anomaly = [
	{'count': 0, 'text': '# of datasets entirely in 1 file page'},
	{'count': 0, 'text': '# of datasets in 1 extra file page'},
	{'count': 0, 'text': '# of datasets in 2 extra file pages'},
	{'count': 0, 'text': '# of datasets in 3 extra file pages'},
	{'count': 0, 'text': '# of datasets in >3 extra file pages'},
	]
	most_page = [
	{'count': 0, 'text': '# of datasets in 0 - 10%'},
	{'count': 0, 'text': '# of datasets in 10 - 20%'},
	{'count': 0, 'text': '# of datasets in 20 - 30%'},
	{'count': 0, 'text': '# of datasets in 30 - 40%'},
	{'count': 0, 'text': '# of datasets in 40 - 50%'},
	{'count': 0, 'text': '# of datasets in 50 - 60%'},
	{'count': 0, 'text': '# of datasets in 60 - 70%'},
	{'count': 0, 'text': '# of datasets in 70 - 80%'},
	{'count': 0, 'text': '# of datasets in 80 - 90%'},
	{'count': 0, 'text': '# of datasets in 90 - 100%'},
	]

	dset_info = list()
	with h5py.File(cli.h5file, mode='r') as f:
	fcpl = f.id.get_create_plist()
	page = fcpl.get_file_space_strategy()[0] == h5py.h5f.FSPACE_STRATEGY_PAGE
	if page:
	page_size = fcpl.get_file_space_page_size()
	else:
	f.visititems(partial(dset_stats, dset_list=dset_info, page_size=0))

	if page and page_size:
	with h5py.File(cli.h5file, mode='r', page_buf_size=4 * page_size) as f:
	f.visititems(partial(dset_stats, dset_list=dset_info, page_size=page_size))

	if cli.show:
	if cli.json:
	print('[')
	flag = False
	for _ in sorted(dset_info, key=lambda d: d.name):
	if flag:
	print(',')
	else:
	flag = True
	if page:
	print(f'{{"dataset":"{_.name}","stored_size":{_.stor_size},"chunks_stored":{_.num_stored},'
	f'"chunk_size":{_.size},"chunk_shape_ratio":{_.extent_ratio:.6g},'
	f'"file_pages":{len(_.page_bins)},"page_spread_anomaly":{_.page_spread_anomaly}}}', end='')
	else:
	print(f'{{"dataset":"{_.name}","stored_size":{_.stor_size},"chunks_stored":{_.num_stored},'
	f'"chunk_size":{_.size},"chunk_shape_ratio":{_.extent_ratio:.6g}}}', end='')
	print('\n]')
	else:
	for _ in sorted(dset_info, key=lambda d: d.name):
	if page:
	print(f'dataset {_.name} stored_size={_.stor_size} chunks_stored={_.num_stored}'
	f' chunk_size={_.size} chunk_shape_ratio={_.extent_ratio:.6g}'
	f' file_pages={len(_.page_bins)} page_spread_anomaly={_.page_spread_anomaly}')
	else:
	print(f'dataset {_.name} stored_size={_.stor_size} chunks_stored={_.num_stored}'
	f' chunk_size={_.size} chunk_shape_ratio={_.extent_ratio:.6g}')
	raise SystemExit()

	for ds_info in dset_info:
	try:
	num_chunks[math.ceil(math.log10(ds_info.num_stored)) + 1]['count'] += 1
	except IndexError:
	num_chunks[-1]['count'] += 1
	except ValueError:
	num_chunks[0]['count'] += 1

	try:
	chunk_size[max(3, math.ceil(math.log10(ds_info.size))) - 3]['count'] += 1
	except IndexError:
	chunk_size[-1]['count'] += 1

	for i, ce in enumerate(chunk_ext):
	if ce['min_val'] < ds_info.extent_ratio <= ce['max_val']:
	ce['count'] += 1
	break

	if page:
	for ds_info in dset_info:
	try:
	chunk_pages[len(ds_info.page_bins) - 1]['count'] += 1
	except IndexError:
	chunk_pages[-1]['count'] += 1

	try:
	page_anomaly[ds_info.page_spread_anomaly]['count'] += 1
	except IndexError:
	page_anomaly[-1]['count'] += 1

	max_prcnt = max([_ / ds_info.num_stored for _ in ds_info.page_bins.values()])
	try:
	most_page[math.floor(max_prcnt * 10)]['count'] += 1
	except IndexError:
	most_page[-1]['count'] += 1

	chunked_dsets = len(dset_info)
	for blah in (num_chunks, chunk_size, chunk_ext):
	if sum([_['count'] for _ in blah]) != chunked_dsets:
	raise ValueError('Sanity check failed: Number of chunked datasets different')

	print(f'Dataset chunk statistics for {cli.h5file}:')
	if page:
	print(f'"PAGE" file space strategy with page size of {page_size:,} bytes.')
	print(f'\tTotal chunked datasets: {chunked_dsets}')
	print('\tChunk bins:')
	for _ in num_chunks:
	print(f"\t\t{_['text']}: {_['count']}")
	print('\tChunk size bins:')
	for _ in chunk_size:
	print(f"\t\t{_['text']}: {_['count']}")
	print('\tChunk to dataset shape ratio bins:')
	for _ in chunk_ext:
	print(f"\t\t{_['text']}: {_['count']}")
	if page:
	print('\tChunk file pages bins:')
	for _ in chunk_pages:
	print(f"\t\t{_['text']}: {_['count']}")
	print('\tChunk file page spread anomaly (extra file pages based on total dataset storage size):')
	for _ in page_anomaly:
	print(f"\t\t{_['text']}: {_['count']}")
	print('\tMaximum percentage of chunks in a single file page:')
	for _ in most_page:
	print(f"\t\t{_['text']}: {_['count']}")