-
-
Save grzanka/7e52835a303aff76e737ad3399703add to your computer and use it in GitHub Desktop.
Ingredients to reproduce problem in https://github.com/scikit-hep/uproot5/discussions/1135
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import uproot | |
import h5py | |
import click | |
def peak_count(fpeak, channel_no: int, peak_type: str) -> int: | |
total_peaks = 0 | |
for trc_file_no in range(fpeak.attrs["no_of_trc_files"]): | |
for segment_no in range(fpeak.attrs["no_of_segments"]): | |
dset_name = f"channel_{channel_no}/trc_file_{trc_file_no:03d}/segment_{segment_no:03d}/{peak_type}/peak_index" | |
try: | |
total_peaks += fpeak[dset_name].shape[0] | |
except KeyError: | |
pass | |
return total_peaks | |
@click.command() | |
@click.argument("hdf_file_path", type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, path_type=Path)) | |
@click.argument("root_file_path", type=click.Path(path_type=Path)) | |
def convert(hdf_file_path: Path, root_file_path: Path): | |
root_file_path.unlink(missing_ok=True) | |
dict_entries_per_basket = 1000_000 # set to 100_000 for problematic files | |
with h5py.File(hdf_file_path, 'r') as f, uproot.recreate(root_file_path) as fout: | |
for channel_no in range(4): | |
print(f"Processing channel {channel_no}") | |
gain_mV = f[f'channel_{channel_no}'].attrs['gain_mV'] | |
offset_mV = f[f'channel_{channel_no}'].attrs['offset_mV'] | |
horiz_interval_ns = f[f'channel_{channel_no}'].attrs['horiz_interval_ns'] | |
fout[f'channel_{channel_no}/gain_mV'] = str(gain_mV) | |
fout[f'channel_{channel_no}/offset_mV'] = str(offset_mV) | |
fout[f'channel_{channel_no}/horiz_interval_ns'] = str(horiz_interval_ns) | |
for peak_type in ['positive', 'negative']: | |
print(f"Processing {peak_type} peaks") | |
total_number_of_peaks = peak_count(f, channel_no, peak_type) | |
for i in range(0, total_number_of_peaks, dict_entries_per_basket): | |
dict_bucket = {} | |
for name, dataset in f[f'channel_{channel_no}/{peak_type}'].items(): | |
dict_bucket[name] = dataset[i:i + dict_entries_per_basket] | |
dict_bucket['peak_value_mV'] = dict_bucket['peak_value'] * gain_mV | |
dict_bucket['peak_length_ns'] = dict_bucket['peak_length'] * horiz_interval_ns | |
dict_bucket['peak_start_us'] = dict_bucket['peak_start'] * horiz_interval_ns / 1000 | |
dict_bucket['peak_cfd_us'] = dict_bucket['peak_cfd_index'] * horiz_interval_ns / 1000 | |
dict_bucket['peak_rise_ns'] = dict_bucket['rise_time'] * horiz_interval_ns | |
dict_bucket['peak_area_ns_mV'] = dict_bucket['peak_area'] * horiz_interval_ns * gain_mV | |
dict_bucket['peak_baseline_mV'] = dict_bucket['peak_baseline'] * gain_mV - offset_mV | |
dict_bucket['peak_noise_mV'] = dict_bucket['peak_noise'] * gain_mV | |
dict_bucket['peak_fwhm_ns'] = dict_bucket['peak_fwhm'] * horiz_interval_ns | |
try: | |
ttree_name = f'channel_{channel_no}/{peak_type}' | |
if i == 0: | |
fout[ttree_name] = dict_bucket | |
else: | |
fout[ttree_name].extend(dict_bucket) | |
basket_size = 0 | |
entry_size = 0 | |
for value in dict_bucket.values(): | |
basket_size += value.nbytes | |
entry_size += value.dtype.itemsize | |
dict_entries = dict_bucket['peak_value_mV'].shape[0] | |
print(f"\textending each of {len(dict_bucket.values())} branch of TTree {ttree_name} with bucket no {i//dict_entries_per_basket:d}") | |
print(f"\tadding {dict_entries} entries (each entry has {entry_size} bytes, in total {basket_size/1024/1024:.2f} MB)") | |
except Exception as e: | |
print(f"Error {e} while writing {i} to {i + dict_entries_per_basket}") | |
saving_ok = False | |
break | |
if not saving_ok: | |
print(f"Generated file with {root_file_path.stat().st_size} bytes") | |
print(f"Removing {root_file_path}, file corrupted") | |
root_file_path.unlink() | |
if __name__ == "__main__": | |
convert() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Processing channel 0 | |
Processing positive peaks | |
extending each of 23 branch of TTree channel_0/positive with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 6 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 7 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/positive with bucket no 8 | |
adding 507702 entries (each entry has 152 bytes, in total 73.60 MB) | |
Processing negative peaks | |
extending each of 23 branch of TTree channel_0/negative with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_0/negative with bucket no 6 | |
adding 433912 entries (each entry has 152 bytes, in total 62.90 MB) | |
Processing channel 1 | |
Processing positive peaks | |
extending each of 23 branch of TTree channel_1/positive with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 6 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 7 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 8 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/positive with bucket no 9 | |
adding 345506 entries (each entry has 152 bytes, in total 50.08 MB) | |
Processing negative peaks | |
extending each of 23 branch of TTree channel_1/negative with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_1/negative with bucket no 6 | |
adding 699947 entries (each entry has 152 bytes, in total 101.46 MB) | |
Processing channel 2 | |
Processing positive peaks | |
extending each of 23 branch of TTree channel_2/positive with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 6 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 7 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 8 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/positive with bucket no 9 | |
adding 378574 entries (each entry has 152 bytes, in total 54.88 MB) | |
Processing negative peaks | |
extending each of 23 branch of TTree channel_2/negative with bucket no 0 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 1 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 2 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 3 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 4 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 5 | |
adding 1000000 entries (each entry has 152 bytes, in total 144.96 MB) | |
extending each of 23 branch of TTree channel_2/negative with bucket no 6 | |
adding 985117 entries (each entry has 152 bytes, in total 142.80 MB) | |
Processing channel 3 | |
Processing positive peaks | |
Error 'i' format requires -2147483648 <= number <= 2147483647 while writing 0 to 1000000 | |
Processing negative peaks | |
Error 'i' format requires -2147483648 <= number <= 2147483647 while writing 0 to 1000000 | |
Generated file with 3964309449 bytes | |
Removing 4nA.slim.root, file corrupted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ares][plgkongruencj@ac0015 disc1135]$ wget https://s3p.cloud.cyfronet.pl/datarawlv2v4/20231204m2/4nA.slim.hdf | |
--2024-02-22 14:26:25-- https://s3p.cloud.cyfronet.pl/datarawlv2v4/20231204m2/4nA.slim.hdf | |
Resolving s3p.cloud.cyfronet.pl (s3p.cloud.cyfronet.pl)... 149.156.176.202, 149.156.176.201, 149.156.176.200 | |
Connecting to s3p.cloud.cyfronet.pl (s3p.cloud.cyfronet.pl)|149.156.176.202|:443... connected. | |
HTTP request sent, awaiting response... 200 OK | |
Length: 4321335884 (4.0G) [application/x-hdf] | |
Saving to: ‘4nA.slim.hdf’ | |
4nA.slim.hdf 100%[===========================================================================>] 4.02G 319MB/s in 14s | |
2024-02-22 14:26:38 (304 MB/s) - ‘4nA.slim.hdf’ saved [4321335884/4321335884] | |
[ares][plgkongruencj@ac0015 disc1135]$ python -m venv venv | |
[ares][plgkongruencj@ac0015 disc1135]$ source venv/bin/activate | |
(venv) [ares][plgkongruencj@ac0015 disc1135]$ pip install uproot h5py click | |
Collecting uproot | |
Using cached uproot-5.2.2-py3-none-any.whl (346 kB) | |
Collecting h5py | |
Using cached h5py-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB) | |
Collecting click | |
Downloading click-8.1.7-py3-none-any.whl (97 kB) | |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 97.9/97.9 kB 2.3 MB/s eta 0:00:00 | |
Collecting awkward>=2.4.6 | |
Using cached awkward-2.6.1-py3-none-any.whl (749 kB) | |
Collecting fsspec | |
Using cached fsspec-2024.2.0-py3-none-any.whl (170 kB) | |
Collecting numpy | |
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB) | |
Collecting packaging | |
Using cached packaging-23.2-py3-none-any.whl (53 kB) | |
Collecting awkward-cpp==29 | |
Using cached awkward_cpp-29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (706 kB) | |
Collecting importlib-metadata>=4.13.0 | |
Using cached importlib_metadata-7.0.1-py3-none-any.whl (23 kB) | |
Collecting zipp>=0.5 | |
Using cached zipp-3.17.0-py3-none-any.whl (7.4 kB) | |
Installing collected packages: zipp, packaging, numpy, fsspec, click, importlib-metadata, h5py, awkward-cpp, awkward, uproot | |
Successfully installed awkward-2.6.1 awkward-cpp-29 click-8.1.7 fsspec-2024.2.0 h5py-3.10.0 importlib-metadata-7.0.1 numpy-1.26.4 packaging-23.2 uproot-5.2.2 zipp-3.17.0 | |
[notice] A new release of pip available: 22.3.1 -> 24.0 | |
[notice] To update, run: pip install --upgrade pip | |
(venv) [ares][plgkongruencj@ac0015 disc1135]$ python discussion1135.py 4nA.slim.hdf 4nA.slim.root 1>stdout.log 2>stderr.log |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment