Last active
October 31, 2018 17:03
-
-
Save liangwang0734/b8f9d82f050950c90824e524d64e6863 to your computer and use it in GitHub Desktop.
compare_mercury_messenger_vs_gkeyll_B_field.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import warnings | |
warnings.simplefilter(action='ignore', category=FutureWarning) | |
import matplotlib as mpl | |
show_on_screen = True | |
if not show_on_screen: | |
mpl.use("agg") # if not showing on screen, agg is faster | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
import numpy as np | |
import pandas as pd | |
import h5py | |
import scipy.interpolate as interpolate | |
import scipy.ndimage as ndimage | |
from matplotlib.ticker import MaxNLocator | |
import os.path | |
from time import time as now | |
""" | |
Attempts to make things faster or to handle huge data: | |
- Convert observation data to binary format for faster repeated access, if no | |
converted file exists. | |
- Extract model data from local region for faster repeated access, if no | |
extracted file exists. | |
- Only sample points between tstart and tend. | |
- Option read_all_to_mem to read entire data block into memory or do i/o | |
directly on disk. Works good for small files. Does not work well with large | |
dataset as slicing over component and z index can be very slow. | |
- If read_all_to_mem is False, then we can use option extract_step to specify | |
each time how many x indices to step over. We iterate over jx indices, and | |
first read data block of index [jx:jx+extract_step, iy, :, :] into memory ( | |
very slow), and then extract the component and iz range data into files | |
(fast). This works well with large files. Use as large extrct_step as | |
possible. | |
""" | |
# original data files | |
folder = "3D-10mom_3_Xianzhe_InnerBcs_highres_1_3/" | |
folder_obs = "./" | |
fname_obs = folder_obs + 'MAGMSOSCI08280_V08.TAB' | |
fname0 = folder + "mercury_v2_staticField.h5" | |
fname1 = folder + "mercury_v2_q_{}.h5" | |
frames = [6] | |
# observation data file saved in binary format for faster access | |
fname_obs_bin = folder_obs + "MAGMSOSCI08280_V08.npz" | |
# extracted simulation data files; each B component in a separate file | |
fname_B1_fmt = folder + "mercury_v2_compare_B1_{}.h5" | |
fname_B0_fmt = folder + "mercury_v2_compare_B0.h5" | |
tick_label_fmt = mdates.DateFormatter("%H:%M:%S") | |
tstart = pd.to_datetime(pd.datetime(2008, 10, 6, 7, 0, 0)) | |
tend = pd.to_datetime(pd.datetime(2008, 10, 6, 10, 0, 0)) | |
# slicing stride for sampling points along the trajectory | |
# larger stride means fewer sampling points | |
stride = 1000 | |
# mode can be "nearest" or "map_coordinates" | |
mode = "map_coordinates" | |
# output figure | |
figname = 'output_{}.png'.format(mode) | |
# constants | |
R_km = 2439.7 | |
iR_km = 1. / R_km | |
R = R_km * 1e3 | |
iR = 1. / R | |
# observation data i/o options | |
# convert observation date from ascii (TAB) to binary, if not exists | |
try_convert_obs_bin = True | |
# read observation data saved in the binary file, if exists | |
try_read_obs_bin = True | |
# gkeyll data extraction options | |
# extract background B field in local region | |
force_extract_B0 = False | |
# extract B1 in local region | |
force_extract_B1 = False | |
Bx_idx0 = 3 | |
Bx_idx = 23 | |
# default extract options | |
extract_kwargs = dict( | |
xmin=-10 * R, | |
xmax=10 * R, | |
xstep=1, | |
ymin=-8 * R, | |
ymax=10 * R, | |
ystep=1, | |
zmin=-0.5 * R, | |
zmax=0.5 * R, | |
zstep=1, | |
dtype="float32", # use float16 if mem overflows | |
coeff=1e9, # nano-tesla | |
read_all_to_mem=False, # set False if mem overflows | |
extract_step=100, # larger, faster, but might overflow | |
) | |
# gkeyll data sampling options | |
# read entire component data into memory before sampling | |
read_all_to_mem = True | |
if mode == "map_coordinates": | |
# set order = 0 to use nearest point | |
map_kwargs = dict(order=3) | |
# READ OBSERVED B FIELD | |
print("***** Reading observation data...") | |
exists = os.path.exists(fname_obs_bin) | |
if try_read_obs_bin and exists: | |
arr = np.load(fname_obs_bin) | |
x = arr["x"] | |
y = arr["y"] | |
z = arr["z"] | |
Bx = arr["Bx"] | |
By = arr["By"] | |
Bz = arr["Bz"] | |
B = arr["B"] | |
time = pd.to_datetime(arr["time"]) | |
else: | |
arr = np.loadtxt(fname_obs) | |
x = arr[:, 6] * iR_km | |
y = arr[:, 7] * iR_km | |
z = arr[:, 8] * iR_km | |
Bx = arr[:, 9] | |
By = arr[:, 10] | |
Bz = arr[:, 11] | |
B = np.sqrt(Bx**2 + By**2 + Bz**2) | |
year = arr[:, 0].astype(np.int64) | |
day_of_year = arr[:, 1].astype(np.int64) | |
hour = arr[:, 2].astype(np.int64) | |
minute = arr[:, 3].astype(np.int64) | |
second = arr[:, 4] | |
time0_str = '{} {} {} {} {}'.format(year[0], day_of_year[0], hour[0], | |
minute[0], second[0]) | |
time0 = pd.datetime.strptime(time0_str, '%Y %j %H %M %S.%f') | |
dt_in_s = ( | |
(day_of_year - day_of_year[0]) * 24.0 * 3600 + (hour - hour[0]) * 3600 | |
+ (minute - minute[0]) * 60 + second - second[0]) | |
dt_in_s += (time0 - pd.datetime(1970, 1, 1, 0, 0, 0)).total_seconds() | |
time = pd.to_datetime(dt_in_s, unit='s') | |
if try_convert_obs_bin and not exists: | |
print("convert observation data to {}".format(fname_obs_bin)) | |
np.savez(fname_obs_bin, x=x, y=y, z=z, Bx=Bx, By=By, Bz=Bz, B=B, time=time) | |
print("time[0]: {}".format(time[0])) | |
print("time[-1]: {}".format(time[-1])) | |
mask = (time < tend) & (time > tstart) | |
x = x[mask] | |
y = y[mask] | |
z = z[mask] | |
Bx = Bx[mask] | |
By = By[mask] | |
Bz = Bz[mask] | |
B = B[mask] | |
time = time[mask] | |
print("flyby x min {:10g}, max {:10g}".format(x.min(), x.max())) | |
print("flyby y min {:10g}, max {:10g}".format(y.min(), y.max())) | |
print("flyby z min {:10g}, max {:10g}".format(z.min(), z.max())) | |
# EXTRACT REGIONAL DATA | |
print("\n***** Extracting model data...") | |
def adjust_idx(ix_cc, nx): | |
""" | |
Make sure slice indices are within bound. | |
Returns: | |
(ix_cc, ix_nc) | |
""" | |
start, stop, step = ix_cc.indices(nx) | |
ix_cc = slice(start, stop, step) | |
start, stop, step = slice(start, stop + step, step).indices(nx) | |
ix_nc = slice(start, stop, step) | |
return ix_cc, ix_nc | |
def extract3d(filename_in, | |
filename_out, | |
comp, | |
xmin, | |
xmax, | |
xstep, | |
ymin, | |
ymax, | |
ystep, | |
zmin, | |
zmax, | |
zstep, | |
read_all_to_mem=True, | |
extract_step=100, | |
coeff=1, | |
**kwargs): | |
""" | |
Extract one component in a local region. | |
Arguments: | |
read_all_to_mem: True or False. If True, original dataset is loaded into | |
memory for fast extraction; If False, extract from hdf5 dataset on disk | |
directly so that very large files can be handled. | |
extract_step: If read_all_to_mem is False, then we iterate x indices to read | |
all components in many yz planes. This argument sets how many x indices | |
we step over each time, i.e., each time we read data in x index range jx | |
to jx+extract_out. | |
coeff: Multiplication coefficient. | |
kwargs: Arguments passed to h5py.create_dataset. Examples are: | |
dtype: For example, "float32". Note that a lower precision dtype use | |
less space but might truncate tiny/huge values. Then it might be | |
useful to adjust multiplication coeff so that most values are well | |
represented. | |
chunks: ... | |
""" | |
file_in = h5py.File(filename_in, 'r') | |
file_out = h5py.File(filename_out, 'w') | |
# grid | |
sg_in = file_in["StructGrid"] | |
vsKind = sg_in.attrs["vsKind"].decode("UTF-8") | |
if vsKind in ["structured"]: | |
xnc = sg_in[:, 0, 0, 0] | |
ync = sg_in[0, :, 0, 1] | |
znc = sg_in[0, 0, :, 2] | |
elif vsKind in ["uniform"]: | |
lo = np.array(sg_in.attrs["vsLowerBounds"]) | |
up = np.array(sg_in.attrs["vsUpperBounds"]) | |
ncells = np.array(sg_in.attrs["vsNumCells"]) | |
xnc, ync, znc = [ | |
np.linspace(lo[d], up[d], ncells[d]) for d in range(3) | |
] | |
x = 0.5 * (xnc[1:] + xnc[:-1]) | |
y = 0.5 * (ync[1:] + ync[:-1]) | |
z = 0.5 * (znc[1:] + znc[:-1]) | |
# slices | |
ix = slice(np.searchsorted(x, xmin), np.searchsorted(x, xmax), xstep) | |
iy = slice(np.searchsorted(y, ymin), np.searchsorted(y, ymax), ystep) | |
iz = slice(np.searchsorted(z, zmin), np.searchsorted(z, zmax), zstep) | |
nx = x.shape[0] | |
ny = y.shape[0] | |
nz = z.shape[0] | |
ix, ixg = adjust_idx(ix, nx) | |
iy, iyg = adjust_idx(iy, ny) | |
iz, izg = adjust_idx(iz, nz) | |
print("extraction slices: ix {} iy {} iz {}".format(ix, iy, iz)) | |
nx = (ix.stop - ix.start) // ix.step | |
ny = (iy.stop - iy.start) // iy.step | |
nz = (iz.stop - iz.start) // iz.step | |
print("extraction size: nx {} ny {} nz {}".format(nx, ny, nz)) | |
# field | |
sgf_in = file_in["StructGridField"] | |
if read_all_to_mem: | |
kwargs.setdefault("dtype", sgf_in.dtype) | |
data = sgf_in[ix, iy, iz, :] * coeff | |
Bx_out = file_out.create_dataset("Bx", data=data[..., comp], **kwargs) | |
By_out = file_out.create_dataset( | |
"By", data=data[..., comp + 1], **kwargs) | |
Bz_out = file_out.create_dataset( | |
"Bz", data=data[..., comp + 2], **kwargs) | |
else: | |
nx = (ix.stop - ix.start) // ix.step | |
ny = (iy.stop - iy.start) // iy.step | |
nz = (iz.stop - iz.start) // iz.step | |
kwargs.setdefault("dtype", sgf_in.dtype) | |
Bx_out = file_out.create_dataset("Bx", shape=(nx, ny, nz), **kwargs) | |
By_out = file_out.create_dataset("By", shape=(nx, ny, nz), **kwargs) | |
Bz_out = file_out.create_dataset("Bz", shape=(nx, ny, nz), **kwargs) | |
jxs = range(ix.start, ix.stop, ix.step) | |
njx = len(jxs) | |
t00 = now() | |
for jx_out, jx in enumerate(jxs): | |
if jx_out % extract_step == 0 and jx_out + extract_step <= njx: | |
t0 = now() | |
print("jx {}:{}, jx_out {}:{} out of {}".format( | |
jx, jx + extract_step, jx_out, jx_out + extract_step, nx)) | |
tmp = sgf_in[jx:jx + extract_step, iy, :, :] * coeff | |
print(" extract file to mem took {}s".format(now() - t0)) | |
t0 = now() | |
Bx_out[jx_out:jx_out + extract_step, ...] = tmp[..., iz, comp] | |
By_out[jx_out:jx_out + extract_step, ...] = tmp[..., iz, comp + | |
1] | |
Bz_out[jx_out:jx_out + extract_step, ...] = tmp[..., iz, comp + | |
2] | |
print(" extract mem to file took {}s".format(now() - t0)) | |
del(tmp) # FIXME does this really free the memory? | |
elif (jx_out + | |
extract_step) > njx and jx_out % extract_step == 0: | |
print("jx {}:{}, jx_out {}: out of {}".format( | |
jx, jxs[-1] + 1, jx_out, nx)) | |
tmp = sgf_in[jx:jxs[-1] + 1, iy, :, :] * coeff | |
Bx_out[jx_out:, ...] = tmp[..., iz, comp] | |
By_out[jx_out:, ...] = tmp[..., iz, comp + 1] | |
Bz_out[jx_out:, ...] = tmp[..., iz, comp + 2] | |
print("extraction took {}s".format(now() - t00)) | |
for attr in ['vsType', 'vsMesh', 'vsCentering']: | |
for dset in [Bx_out, By_out, Bz_out]: | |
dset.attrs[attr] = sgf_in.attrs[attr] | |
# mesh | |
sg_out = file_out.create_group("StructGrid") | |
sg_out.attrs["vsType"] = np.string_("mesh") | |
sg_out.attrs["vsKind"] = np.string_("rectilinear") | |
sg_out.create_dataset("axis0", data=xnc[ixg]) | |
sg_out.create_dataset("axis1", data=ync[iyg]) | |
sg_out.create_dataset("axis2", data=znc[izg]) | |
# time | |
file_out.create_group('timeData') | |
for attr in ['vsType', 'vsTime', 'vsStep']: | |
file_out['timeData'].attrs[attr] = file_in['timeData'].attrs[attr] | |
file_out.close() | |
file_in.close() | |
fname_out = fname_B0_fmt | |
exists = os.path.exists(fname_out) | |
if force_extract_B0 or not exists: | |
print("\nextracting to {}".format(fname_out)) | |
extract3d(fname0, fname_out, Bx_idx0, **extract_kwargs) | |
else: | |
print("{} exists; skipping".format(fname_out)) | |
for frame in frames: | |
fname_out = fname_B1_fmt.format(frame) | |
exists = os.path.exists(fname_out) | |
if force_extract_B1 or not exists: | |
print("\nextracting to {}".format(fname_out)) | |
extract3d( | |
fname1.format(frame), fname_out, comp=Bx_idx, **extract_kwargs) | |
else: | |
print("{} exists; skipping".format(fname_out)) | |
print("\n***** Sampling model data...") | |
# READ SIMULATION GRID | |
with h5py.File(fname_B0_fmt) as fp: | |
sg = fp["StructGrid"] | |
xnc = sg["axis0"][:] * iR | |
ync = sg["axis1"][:] * iR | |
znc = sg["axis2"][:] * iR | |
xcc = 0.5 * (xnc[1:] + xnc[:-1]) | |
ycc = 0.5 * (ync[1:] + ync[:-1]) | |
zcc = 0.5 * (znc[1:] + znc[:-1]) | |
print("extracted region x min {:10g}, max {:10g}".format(xcc.min(), xcc.max())) | |
print("extracted region y min {:10g}, max {:10g}".format(ycc.min(), ycc.max())) | |
print("extracted region z min {:10g}, max {:10g}".format(zcc.min(), zcc.max())) | |
coords = [xcc, ycc, zcc] | |
coords_t = np.array([x[::stride], y[::stride], z[::stride]]) | |
# READ B FIELD FROM SIMULATION AND MAKE PLOTS | |
def prep_nearest(coords, coords_t, **kwargs): | |
npts = coords_t.shape[1] | |
idx = np.empty([3, npts], dtype=int) | |
for d in range(3): | |
x = coords[d] | |
xt = coords_t[d] | |
nx, = x.shape | |
idx[d, :] = np.clip(np.searchsorted(x, xt, **kwargs), 0, nx - 1) | |
return idx | |
def interp_nearest(data, idx, read_all_to_mem=True, **kwargs): | |
npts = idx.shape[1] | |
pts = np.empty([npts]) | |
if read_all_to_mem: | |
data = data[...] | |
for ipt in range(npts): | |
pts[ipt] = data[tuple(idx[:, ipt])] | |
return np.asarray(pts) | |
def prep_map_coordinates(coords, coords_t, **kwargs): | |
xti = [] | |
kind = kwargs.pop("kind", "linear") | |
for d in range(3): | |
x = coords[d] | |
# xi: image coordinates of field, i.e., 0 to n-1; ints | |
xi = np.arange(len(x)) | |
xt = np.clip(coords_t[d], x.min(), x.max()) | |
# xti: image coordiantes of sampling points; floats | |
xti.append(interpolate.interp1d(x, xi, kind=kind)(xt)) | |
return np.vstack(xti) | |
def interp_map_coordinates(data, xti, read_all_to_mem=True, **kwargs): | |
if read_all_to_mem: | |
data = data[...] | |
return ndimage.map_coordinates(data, xti, **kwargs) | |
if mode == "map_coordinates": | |
xti = prep_map_coordinates(coords, coords_t) | |
interp = interp_map_coordinates | |
kwargs = dict(xti=xti, read_all_to_mem=read_all_to_mem) | |
kwargs.update(map_kwargs) | |
elif mode == "nearest": | |
idx = prep_nearest(coords, coords_t) | |
interp = interp_nearest | |
kwargs = dict(idx=idx, read_all_to_mem=read_all_to_mem) | |
with h5py.File(fname_B0_fmt) as fp: | |
Bx0 = interp(fp['Bx'], **kwargs) | |
By0 = interp(fp['By'], **kwargs) | |
Bz0 = interp(fp['Bz'], **kwargs) | |
Bmag0 = np.sqrt(Bx0**2 + By0**2 + Bz0**2) | |
# TODO update figure data instead of redraw everything | |
for frame in frames: | |
with h5py.File(fname_B1_fmt.format(frame)) as fp: | |
Bx1 = interp(fp['Bx'], **kwargs) | |
By1 = interp(fp['By'], **kwargs) | |
Bz1 = interp(fp['Bz'], **kwargs) | |
Bxt = Bx0 + Bx1 | |
Byt = By0 + By1 | |
Bzt = Bz0 + Bz1 | |
Bmagt = np.sqrt(Bxt**2 + Byt**2 + Bzt**2) | |
print("\n***** Making plot...") | |
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(10, 10)) | |
for i, ax in enumerate(axs): | |
ax.plot( | |
time, [Bx, By, Bz, B][i], | |
c='k', | |
lw=2, | |
ls='-', | |
alpha=0.75, | |
label=['$B_x$', '$B_y$', '$B_z$', '$|B|$'][i] + ' - MAG') | |
if i == 3: | |
ax.plot( | |
time[::stride], [Bx0, By0, Bz0, Bmag0][i], | |
c='b', | |
lw=2, | |
ls='--', | |
alpha=0.75, | |
label=['$B_{x0}$', '$B_{y0}$', '$B_{z0}$', '$|B_0|$'][i]) | |
ax.plot( | |
time[::stride], [Bxt, Byt, Bzt, Bmagt][i], | |
c='r', | |
lw=2, | |
ls='-', | |
alpha=0.75, | |
label=['$B_x$', '$B_y$', '$B_z$', '$|B|$'][i] + ' - Gkeyll') | |
ax.xaxis.set_major_formatter(tick_label_fmt) | |
ax.legend() | |
# ax.axhline(0, color='k', linestyle='dashed') # mark horizontal axis | |
ax.grid() | |
ax.yaxis.set_major_locator(MaxNLocator(prune='both')) | |
ax.set_xlim(tstart, tend) | |
axs[-1].set_xlabel('Time (UT) in October 6 , 2008') | |
fig.subplots_adjust(hspace=0) | |
plt.savefig(figname, bbox_inches='tight', dpi=320) | |
if show_on_screen: | |
plt.show() | |
else: | |
plt.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment