Skip to content

Instantly share code, notes, and snippets.

@syrte
Last active February 2, 2022 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syrte/028c5d7974c69db40a3208da47aba67a to your computer and use it in GitHub Desktop.
Save syrte/028c5d7974c69db40a3208da47aba67a to your computer and use it in GitHub Desktop.
"""
Python IO for nemo snapshot
Author: syrte (lizz.astro@gmail.com)
Example:
file = '../data/N1e6_d0.22_g0.16_0.00_f0.00.snap'
snap = open_nemo(file, file_meta=False)
print(snap.History.data, end='\n\n')
print(snap['SnapShot/0/Particles'], end='\n\n')
print(snap['SnapShot/0/Parameters/Time'].data)
print(snap.SnapShot[0].Parameters.Time.data)
print(snap.SnapShot[-1].Parameters.Time.data)
t = snap['SnapShot/0/Parameters/Time'].data
x = snap['SnapShot/0/Particles/Position'].data
v = snap['SnapShot/0/Particles/Velocity'].data
m = snap['SnapShot/0/Particles/Mass'].data
# ------------
import unsio.input as uns_in
fp_uns = uns_in.CUNS_IN(file, float32=True)
fp_uns.nextFrame("mxv")
t2 = fp_uns.getData('time')[1]
x2 = fp_uns.getData("all", "pos")[1].reshape(-1, 3)
v2 = fp_uns.getData("all", "vel")[1].reshape(-1, 3)
m2 = fp_uns.getData('all', 'mass')[1]
assert (x==x2).all()
assert (v==v2).all()
History:
02 Feb 2022:
Improve the speed and IO
Add meta file
25 Aug 2021:
Initilize
"""
# reference
# https://github.com/teuben/nemo/blob/master/inc/filestruct.h
# https://github.com/teuben/nemo/blob/master/src/kernel/io/filesecret.h
# https://github.com/teuben/nemo/blob/master/src/kernel/io/filesecret.c
"""
data model example
{
"_type": "i8",
"_shape": [2, 5],
"_data": null,
"_data_offset": 0
"_item_offset": 0
}
"""
__all__ = ['open_nemo']
import numpy as np
import struct
import json
from pathlib import Path
SingMagic = (0o11 << 8) + 0o222
PlurMagic = (0o13 << 8) + 0o222
xstrNULL = b'\x00'
TypeDict = {
'a': 'i1', # AnyType - anything at all
'c': 'u1', # CharType - printable chars [use u1 as proxy]
'b': 'S1', # ByteType - unprintable chars
's': 'i2', # ShortType - short integers
'i': 'i4', # IntType - standard integers
'l': 'i8', # LongType - long integers
'h': 'f2', # HalfpType - half precision floating
'f': 'f4', # FloatType - short floating
'd': 'f8', # DoubleType - long floating
'(': 'set', # SetType - begin compound item
')': 'tes', # TesType - end of compound item
}
MaxTagLen = 65 # max tag length, limited for simplicity
MaxVecDim = 9 # max num of vec dim, limited for safety
MaxSetLen = 65 # max num of components in compound item
def getxstr(file, dtype=None):
"""
xstr is a sequence ends with \x00
dtype:
None for string type
"""
if dtype is None:
nbyte = 1
else:
dtype = np.dtype(dtype)
nbyte = dtype.itemsize
end = xstrNULL * nbyte
buf = []
while True:
s = file.read(nbyte)
if s == end:
break
buf.append(s)
buf = b''.join(buf)
if dtype is None:
return buf.decode()
else:
return np.frombuffer(buf, dtype).tolist()
def gettype(file):
"equiv to getxstr(file)"
return file.read(2)[:1].decode()
def get_item(file, load=False):
item_offset = file.tell()
buff = file.read(4)
if len(buff) == 0:
raise EOFError('File end')
elif len(buff) != 4:
raise ValueError('Unexpected file end')
elif buff[3:4] != xstrNULL:
raise ValueError('Unexpected file structure')
magic = struct.unpack('h', buff[0:2])[0] # int16
if magic != SingMagic and magic != PlurMagic:
raise ValueError(f"Bad magic number: {magic}")
type = buff[2:3].decode()
type = TypeDict[type]
# magic = np.fromfile(file, 'i2', 1)
# type = gettype(file)
if type == 'set':
tag = getxstr(file)
return tag, dict(_type=type, _item_offset=item_offset)
elif type == 'tes':
return None, dict(_type=type) # end of group
else:
tag = getxstr(file)
if magic == SingMagic:
shape = []
size = 1
else:
shape = getxstr(file, 'i4')
size = np.prod(shape)
data_offset = file.tell()
dtype = np.dtype(type)
if load or len(shape) == 0:
data = np.fromfile(file, dtype, size).reshape(shape)
if type == 'u1':
data = data.tobytes().strip(b'\x00').decode()
elif len(shape) == 0:
data = data.item() # scalar
else:
file.seek(size * dtype.itemsize, 1)
data = None
return tag, dict(_type=type, _shape=shape,
_item_offset=item_offset,
_data_offset=data_offset,
_data=data)
def add_item(item, subtag, subitem):
"put repetition into a list"
if subtag in item:
if item[subtag]['_type'] != 'list':
item[subtag] = {'_type': 'list',
'_shape': [1],
'_item_offset': item[subtag]['_item_offset'],
'0': item[subtag]} # first repetition
n = item[subtag]['_shape'][0]
item[subtag][str(n)] = subitem
item[subtag]['_shape'][0] = n + 1
else:
item[subtag] = subitem
def read_item(file, load=False):
tag, item = get_item(file, load=load)
if item['_type'] == 'set':
while True:
subtag, subitem = read_item(file, load=load)
if subitem['_type'] == 'tes':
break
add_item(item, subtag, subitem)
return tag, item
def read_file(file, load=False):
if file is not None and not hasattr(file, 'tell'):
file = open(file, 'rb')
item = dict(_type='set', _item_offset=0)
while True:
try:
subtag, subitem = read_item(file, load=load)
add_item(item, subtag, subitem)
except EOFError:
break
return item
class ItemView:
"""
item = read_file(file)
snap = itemview(item)
"""
def __init__(self, item, file=None, cache=False):
if file is not None and not hasattr(file, 'tell'):
file = open(file, 'rb')
for key, val in item.items():
if isinstance(val, dict):
setattr(self, key, ItemView(val, file=file, cache=cache))
else:
setattr(self, key, val)
self._file = file
self._cache = cache
# self._item = item
def __repr__(self):
if self._type in ['set', 'list']:
desc_list = []
for key, val in self.__dict__.items():
if key.startswith('_'):
continue
elif hasattr(val, '_shape'):
if len(val._shape) == 0:
desc = f"{val._type} {key}: {val._data}"
else:
desc = f"{val._type} {key}{val._shape}"
else:
desc = f"{val._type} {key}"
desc_list.append(desc)
return "\n".join(desc_list)
else:
if len(self._shape) == 0:
return f"{self._type}: {self._data}"
else:
return f"{self._type}{self._shape}"
def __getitem__(self, key):
if isinstance(key, str):
if hasattr(self, key):
return getattr(self, key)
elif '/' in key:
keys = key.strip('/').split('/')
val = self
for key in keys:
val = val[key]
return val
else:
if self._type == 'list':
if isinstance(key, (int, np.integer)):
if key >= 0:
key_str = str(key)
else:
key_str = str(key + self._shape[0])
if hasattr(self, key_str):
return getattr(self, key_str)
raise KeyError(key)
def __getattr__(self, key):
if key == 'data' and self._type not in ['set', 'list']:
return self._load_data()
else:
raise AttributeError(f"'itemview' object has no attribute '{key}'")
def _load_data(self):
if self._data is None:
file = self._file
dtype = self._type
shape = self._shape
offset = self._data_offset
if shape:
size = np.prod(shape)
else:
size = 1
file.seek(offset)
data = np.fromfile(file, dtype, size).reshape(shape)
if dtype == 'u1':
data = data.tobytes().strip(b'\x00').decode()
if self._cache:
self._data = data
return data
else:
return self._data
def open_nemo(file_snap, file_meta=False, rebuild_meta=False, cache=False):
"""
file_snap: str
Path of snapshot file.
file_meta: bool, str
False for disable using meta.
True for using default path, a string for path of meta file.
rebuild_meta: bool
Check if the meta file is consistent with the snap file,
update the meta file when necessary.
cache: bool
If store the data in the itemview object
"""
file = Path(file_snap).expanduser().resolve()
assert file.is_file()
if file_meta:
try:
file_meta = Path(file_meta).expanduser().resolve()
except Exception:
file_meta = file.parent / 'meta' / (file.name + '.meta')
file_meta = Path(file_meta).expanduser().resolve()
assert file != file_meta
if file_meta.is_file():
item = json.load(open(file_meta, 'r'))
if rebuild_meta:
item_true = read_file(file, load=False)
if item != json.loads(json.dumps(item_true)):
item = item_true
json.dump(item, open(file_meta, 'w'), indent=2)
else:
item = read_file(file, load=False)
file_meta.parent.mkdir(exist_ok=True)
json.dump(item, open(file_meta, 'w'), indent=2)
else:
item = read_file(file, load=False)
return ItemView(item, file=file, cache=cache)
@syrte
Copy link
Author

syrte commented Aug 25, 2021

snap['SnapShot'][10]['Parameters']['Time']

@syrte
Copy link
Author

syrte commented Feb 2, 2022

Generate meta files for all *.snap files in a given directory.

from multiprocessing import Pool
from pathlib import Path

def func(file):
    print(file, flush=True)
    snap = open_nemo(file, file_meta=True)


files = list(Path('data').glob('*.snap'))

with Pool(10) as pool:
    pool.map_async(func, files).get(1e6)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment