Skip to content

Instantly share code, notes, and snippets.

@moble
Last active September 28, 2022 15:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moble/2ef958f37c9a05ebecfc4032b3f4a362 to your computer and use it in GitHub Desktop.
Save moble/2ef958f37c9a05ebecfc4032b3f4a362 to your computer and use it in GitHub Desktop.
"""Convert RPXMB files to RPDMB
This file can be run either as a script, or by calling its functions. To run as a script, do
something like this:
python X2D.py Strain_N2.h5 Strain_N3.h5 Strain_N4.h5 ExtraWaveforms.h5
To call the functions, and assuming that this file is in the same directory as the script using it,
do something like this:
import X2D
X2D.x2d("Strain_N2.h5")
Note that the input filenames should be H5 files, but the corresponding JSON files are also used and
updated.
In every case, the output file uses the original file stem appended with "_RPDMB". For example, the
function call above will create "Strain_N2_RPDMB.h5" an "Strain_N2_RPDMB.json". You are responsible
for moving those files into their final locations.
For each input file, this script does the following:
1. Checks if there is any RPXMB data in the input file; if not it does nothing
2. For each group and dataset that is not in RPXMB format, simply copies it (and its attributes)
to the new file, ensuring that shuffle and gzip (level 9) compression are enabled.
3. For each RPXMB-formatted dataset in the input file:
a. Decompress the original RPXMB data
b. Removes any modes with ell<2
c. Copies over the "modifications" entry in the JSON
d. Copies over scri- and spec-related version info
e. Saves the data to RPDMB format
f. Re-loads the RPDMB-formatted file
g. Checks to make sure various pieces of data have carried over correctly:
- `t`, `data`, and `frame` are bitwise identical
- H5 group has same datasets
- H5 group's critical attributes are set correctly:
* ell_max
* ell_min
* n_times
* sxs_format
- JSON file has identical data in "data_info", "modifications", and "validation/n_times"
h. If any of the checks in the last step failed, print a warning with details
"""
import sys
import warnings
import pathlib
import json
import h5py
import numpy as np
import sxs
copied_version_info_items = ["quaternion", "spherical_functions", "spinsfast", "scri", "spec_version_hist"]
compression_options = {
"compression": "gzip",
"compression_opts": 9,
"shuffle": True,
}
indent = " "
def sxs_format(obj):
return getattr(getattr(obj, "parent", {}), "attrs", {}).get("sxs_format", "")
def found_rpxmb(path):
status = [False]
with h5py.File(path, "r") as f:
def func(name, obj):
if name.endswith("data") and sxs_format(obj) == "rotating_paired_xor_multishuffle_bzip2":
status[0] = True
return True # This should stop the visitems loop
f.visititems(func)
return status[0]
def rpxmb2rpdmb(input_h5_path, input_json_path, group, output_h5_path, output_json_path, remove_ell_0_and_1=True):
print(f"{indent}Converting {input_h5_path}{group}", end =" ")
sys.stdout.flush()
w_in = sxs.rpxmb.load(
f"{input_h5_path}{group}",
transform_to_inertial=False,
convert_from_conjugate_pairs=False
)
with input_json_path.open("r") as f:
input_json = json.load(f)
# Preserve this, as it will get lost when we remove ell modes
log_frame = w_in.log_frame
# Copy over any metadata that need to go into the JSON file correctly
w_in._metadata["modifications"] = w_in.json_data["modifications"]
# Copy over several "version_info" keys that may be changed incorrectly
version_info_update = dict()
if group == "/":
input_version_info = input_json["version_info"]
else:
input_version_info = input_json[group]["version_info"]
for copied_version_info_item in copied_version_info_items:
if copied_version_info_item in input_version_info:
version_info_update[copied_version_info_item] = input_version_info[copied_version_info_item]
if group != "/" and version_info_update["spec_version_hist"] == []:
# First, try with the corresponding `h` group's data
alternate_group = group.replace("Psi4", "h")
version_info_update["spec_version_hist"] = input_json.get(
alternate_group, {}).get("version_info", {}).get("spec_version_hist", [])
if group != "/" and version_info_update["spec_version_hist"] == []:
# If the above failed, just use the first `h` finite-radius data
alternate_group = sorted([
k for k in input_json
if k.startswith("/rh_FiniteRadii")
and input_json[k].get("version_info", {}).get("spec_version_hist", []) != []
])[0]
version_info_update["spec_version_hist"] = input_json.get(
alternate_group, {}).get("version_info", {}).get("spec_version_hist", [])
if remove_ell_0_and_1:
w_in = w_in[:, w_in.index(2, -2):]
w_in._metadata["ell_min"] = 2
sxs.rpdmb.save(
w_in,
f"{output_h5_path}{group}",
file_write_mode="a",
allow_existing_group=True,
log_frame=log_frame,
verbose=False,
convert_to_conjugate_pairs=False,
L2norm_fractional_tolerance=0.0,
version_info_update=version_info_update,
)
w_in.convert_from_conjugate_pairs()
w_out = sxs.rpdmb.load(f"{output_h5_path}{group}", transform_to_inertial=False)
differences = [] # Will add to this if any are found
# Check data as loaded by `sxs`
if not np.array_equal(w_in.t, w_out.t):
differences.append("non-identical time values")
if not np.array_equal(w_in.data, w_out.data):
differences.append("non-identical data values")
if not np.array_equal(w_in.frame, w_out.frame):
differences.append("non-identical frame values")
# Check structure of h5 group
with h5py.File(input_h5_path, "r") as f_in, h5py.File(output_h5_path, "r") as f_out:
g_in = f_in[group]
g_out = f_out[group]
if list(g_in) != list(g_out):
differences.append(f"list(g_in) != list(g_out)")
if list(g_in.attrs) != list(g_out.attrs):
differences.append(f"list(g_in.attrs) != list(g_out.attrs)")
if g_in.attrs["ell_max"] != g_out.attrs["ell_max"]:
differences.append(f"""g_in.attrs["ell_max"] != g_out.attrs["ell_max"]""")
if remove_ell_0_and_1:
if g_out.attrs["ell_min"] != 2:
differences.append(f"""g_out.attrs["ell_min"] != 2""")
else:
if g_in.attrs["ell_min"] != g_out.attrs["ell_min"]:
differences.append(f"""g_in.attrs["ell_min"] != g_out.attrs["ell_min"]""")
if g_in.attrs["n_times"] != g_out.attrs["n_times"]:
differences.append(f"""g_in.attrs["n_times"] != g_out.attrs["n_times"]""")
if g_out.attrs["sxs_format"] != "rotating_paired_diff_multishuffle_bzip2":
differences.append(f"""g_out.attrs["sxs_format"] != "rotating_paired_diff_multishuffle_bzip2" """)
# Check JSON data
with output_json_path.open("r") as f_out:
output_json = json.load(f_out)
if group == "/":
g_in = input_json
g_out = output_json
else:
g_in = input_json[group]
g_out = output_json[group]
if remove_ell_0_and_1:
g_in["data_info"]["ell_min"] = 2 # Fake this number for easier comparisons
if g_out["sxs_format"] != "rotating_paired_diff_multishuffle_bzip2":
differences.append(f"""JSON file's "sxs_format" != "rotating_paired_diff_multishuffle_bzip2" """)
# if g_in["version_info"]["spec_version_hist"] != g_out["version_info"]["spec_version_hist"]:
# differences.append(f"""JSON files' "version_info/spec_version_hist" entries don't match""")
if g_in["validation"]["n_times"] != g_out["validation"]["n_times"]:
differences.append(f"""JSON files' "validation/n_times" entries don't match""")
for key in ["data_info", "modifications"]:
if g_in[key] != g_out[key]:
differences.append(f"""JSON files' "{key}" entries don't match""")
if differences:
differences = "\n - ".join(differences)
print("\n" + "!"*120)
print("WARNING: non-identical data values in")
print(f" {input_h5_path}{group}")
print(f" {output_h5_path}{group}")
print(f"and/or corresponding JSON files")
print("-"*120)
print(f" - {differences}")
print("!"*120)
raise RuntimeError()
else:
print("✓")
def x2d(filename):
input_h5_path = pathlib.Path(filename).expanduser().resolve()
output_h5_path = input_h5_path.with_stem(input_h5_path.stem + "_RPDMB")
input_json_path = input_h5_path.with_suffix(".json")
output_json_path = output_h5_path.with_suffix(".json")
if not found_rpxmb(input_h5_path):
warning = f"\nNo RPXMB data found in {input_h5_path}; skipping this file."
warnings.warn(warning)
else:
data_to_extrapolate = []
print(f"""Converting "{input_h5_path}" """)
with h5py.File(input_h5_path, "r") as f1, h5py.File(output_h5_path, "w") as f2:
def func(name, obj):
if name.endswith("data") and sxs_format(obj) == "rotating_paired_xor_multishuffle_bzip2":
parent_name = obj.parent.name
if parent_name != "/":
print(f"{indent}Deferring {name}")
data_to_extrapolate.append(parent_name)
elif name != "Time.dat" and name not in f2:
if isinstance(obj, h5py.Dataset) and obj.size > 1:
print(f"{indent}Compressing {name}")
f2.create_dataset(name, data=obj, **compression_options)
else:
print(f"{indent}Copying {name}")
sys.stdout.flush()
sys.stderr.flush()
f1.copy(name, f2, name=name, shallow=True)
f1.visititems(func)
for group in data_to_extrapolate:
rpxmb2rpdmb(input_h5_path, input_json_path, group, output_h5_path, output_json_path)
input_size = input_h5_path.stat().st_size
output_size = output_h5_path.stat().st_size
print(f"{indent}Input H5 file size: {input_size:_} B")
print(f"{indent}Output H5 file size: {output_size:_} B")
print(f"{indent}X2D compression ratio: {input_size/output_size:.4f}")
if __name__=="__main__":
for filename in sys.argv[1:]:
x2d(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment