Skip to content

Instantly share code, notes, and snippets.

@grzanka
Created February 22, 2024 13:35
Show Gist options
  • Save grzanka/7e52835a303aff76e737ad3399703add to your computer and use it in GitHub Desktop.
Save grzanka/7e52835a303aff76e737ad3399703add to your computer and use it in GitHub Desktop.
Ingredients to reproduce problem in https://github.com/scikit-hep/uproot5/discussions/1135
from pathlib import Path
import uproot
import h5py
import click
def peak_count(fpeak, channel_no: int, peak_type: str) -> int:
total_peaks = 0
for trc_file_no in range(fpeak.attrs["no_of_trc_files"]):
for segment_no in range(fpeak.attrs["no_of_segments"]):
dset_name = f"channel_{channel_no}/trc_file_{trc_file_no:03d}/segment_{segment_no:03d}/{peak_type}/peak_index"
try:
total_peaks += fpeak[dset_name].shape[0]
except KeyError:
pass
return total_peaks
@click.command()
@click.argument("hdf_file_path", type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, path_type=Path))
@click.argument("root_file_path", type=click.Path(path_type=Path))
def convert(hdf_file_path: Path, root_file_path: Path):
root_file_path.unlink(missing_ok=True)
dict_entries_per_basket = 1000_000 # set to 100_000 for problematic files
with h5py.File(hdf_file_path, 'r') as f, uproot.recreate(root_file_path) as fout:
for channel_no in range(4):
print(f"Processing channel {channel_no}")
gain_mV = f[f'channel_{channel_no}'].attrs['gain_mV']
offset_mV = f[f'channel_{channel_no}'].attrs['offset_mV']
horiz_interval_ns = f[f'channel_{channel_no}'].attrs['horiz_interval_ns']
fout[f'channel_{channel_no}/gain_mV'] = str(gain_mV)
fout[f'channel_{channel_no}/offset_mV'] = str(offset_mV)
fout[f'channel_{channel_no}/horiz_interval_ns'] = str(horiz_interval_ns)
for peak_type in ['positive', 'negative']:
print(f"Processing {peak_type} peaks")
total_number_of_peaks = peak_count(f, channel_no, peak_type)
for i in range(0, total_number_of_peaks, dict_entries_per_basket):
dict_bucket = {}
for name, dataset in f[f'channel_{channel_no}/{peak_type}'].items():
dict_bucket[name] = dataset[i:i + dict_entries_per_basket]
dict_bucket['peak_value_mV'] = dict_bucket['peak_value'] * gain_mV
dict_bucket['peak_length_ns'] = dict_bucket['peak_length'] * horiz_interval_ns
dict_bucket['peak_start_us'] = dict_bucket['peak_start'] * horiz_interval_ns / 1000
dict_bucket['peak_cfd_us'] = dict_bucket['peak_cfd_index'] * horiz_interval_ns / 1000
dict_bucket['peak_rise_ns'] = dict_bucket['rise_time'] * horiz_interval_ns
dict_bucket['peak_area_ns_mV'] = dict_bucket['peak_area'] * horiz_interval_ns * gain_mV
dict_bucket['peak_baseline_mV'] = dict_bucket['peak_baseline'] * gain_mV - offset_mV
dict_bucket['peak_noise_mV'] = dict_bucket['peak_noise'] * gain_mV
dict_bucket['peak_fwhm_ns'] = dict_bucket['peak_fwhm'] * horiz_interval_ns
try:
ttree_name = f'channel_{channel_no}/{peak_type}'
if i == 0:
fout[ttree_name] = dict_bucket
else:
fout[ttree_name].extend(dict_bucket)
basket_size = 0
entry_size = 0
for value in dict_bucket.values():
basket_size += value.nbytes
entry_size += value.dtype.itemsize
dict_entries = dict_bucket['peak_value_mV'].shape[0]
print(f"\textending each of {len(dict_bucket.values())} branch of TTree {ttree_name} with bucket no {i//dict_entries_per_basket:d}")
print(f"\tadding {dict_entries} entries (each entry has {entry_size} bytes, in total {basket_size/1024/1024:.2f} MB)")
except Exception as e:
print(f"Error {e} while writing {i} to {i + dict_entries_per_basket}")
saving_ok = False
break
if not saving_ok:
print(f"Generated file with {root_file_path.stat().st_size} bytes")
print(f"Removing {root_file_path}, file corrupted")
root_file_path.unlink()
if __name__ == "__main__":
convert()
Processing channel 0
Processing positive peaks
extending each of 23 branch of TTree channel_0/positive with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 6
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 7
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/positive with bucket no 8
adding 507702 entries (each entry has 152 bytes, in total 73.60 MB)
Processing negative peaks
extending each of 23 branch of TTree channel_0/negative with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_0/negative with bucket no 6
adding 433912 entries (each entry has 152 bytes, in total 62.90 MB)
Processing channel 1
Processing positive peaks
extending each of 23 branch of TTree channel_1/positive with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 6
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 7
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 8
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/positive with bucket no 9
adding 345506 entries (each entry has 152 bytes, in total 50.08 MB)
Processing negative peaks
extending each of 23 branch of TTree channel_1/negative with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_1/negative with bucket no 6
adding 699947 entries (each entry has 152 bytes, in total 101.46 MB)
Processing channel 2
Processing positive peaks
extending each of 23 branch of TTree channel_2/positive with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 6
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 7
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 8
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/positive with bucket no 9
adding 378574 entries (each entry has 152 bytes, in total 54.88 MB)
Processing negative peaks
extending each of 23 branch of TTree channel_2/negative with bucket no 0
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 1
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 2
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 3
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 4
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 5
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB)
extending each of 23 branch of TTree channel_2/negative with bucket no 6
adding 985117 entries (each entry has 152 bytes, in total 142.80 MB)
Processing channel 3
Processing positive peaks
Error 'i' format requires -2147483648 <= number <= 2147483647 while writing 0 to 1000000
Processing negative peaks
Error 'i' format requires -2147483648 <= number <= 2147483647 while writing 0 to 1000000
Generated file with 3964309449 bytes
Removing 4nA.slim.root, file corrupted
[ares][plgkongruencj@ac0015 disc1135]$ wget https://s3p.cloud.cyfronet.pl/datarawlv2v4/20231204m2/4nA.slim.hdf
--2024-02-22 14:26:25-- https://s3p.cloud.cyfronet.pl/datarawlv2v4/20231204m2/4nA.slim.hdf
Resolving s3p.cloud.cyfronet.pl (s3p.cloud.cyfronet.pl)... 149.156.176.202, 149.156.176.201, 149.156.176.200
Connecting to s3p.cloud.cyfronet.pl (s3p.cloud.cyfronet.pl)|149.156.176.202|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4321335884 (4.0G) [application/x-hdf]
Saving to: ‘4nA.slim.hdf’
4nA.slim.hdf 100%[===========================================================================>] 4.02G 319MB/s in 14s
2024-02-22 14:26:38 (304 MB/s) - ‘4nA.slim.hdf’ saved [4321335884/4321335884]
[ares][plgkongruencj@ac0015 disc1135]$ python -m venv venv
[ares][plgkongruencj@ac0015 disc1135]$ source venv/bin/activate
(venv) [ares][plgkongruencj@ac0015 disc1135]$ pip install uproot h5py click
Collecting uproot
Using cached uproot-5.2.2-py3-none-any.whl (346 kB)
Collecting h5py
Using cached h5py-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)
Collecting click
Downloading click-8.1.7-py3-none-any.whl (97 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 97.9/97.9 kB 2.3 MB/s eta 0:00:00
Collecting awkward>=2.4.6
Using cached awkward-2.6.1-py3-none-any.whl (749 kB)
Collecting fsspec
Using cached fsspec-2024.2.0-py3-none-any.whl (170 kB)
Collecting numpy
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Collecting packaging
Using cached packaging-23.2-py3-none-any.whl (53 kB)
Collecting awkward-cpp==29
Using cached awkward_cpp-29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (706 kB)
Collecting importlib-metadata>=4.13.0
Using cached importlib_metadata-7.0.1-py3-none-any.whl (23 kB)
Collecting zipp>=0.5
Using cached zipp-3.17.0-py3-none-any.whl (7.4 kB)
Installing collected packages: zipp, packaging, numpy, fsspec, click, importlib-metadata, h5py, awkward-cpp, awkward, uproot
Successfully installed awkward-2.6.1 awkward-cpp-29 click-8.1.7 fsspec-2024.2.0 h5py-3.10.0 importlib-metadata-7.0.1 numpy-1.26.4 packaging-23.2 uproot-5.2.2 zipp-3.17.0
[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: pip install --upgrade pip
(venv) [ares][plgkongruencj@ac0015 disc1135]$ python discussion1135.py 4nA.slim.hdf 4nA.slim.root 1>stdout.log 2>stderr.log
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment