Skip to content

Instantly share code, notes, and snippets.

@rkube
Created November 7, 2022 09:32
Show Gist options
  • Save rkube/a30738d6c4fc7daa6b0e0c5231391d59 to your computer and use it in GitHub Desktop.
Save rkube/a30738d6c4fc7daa6b0e0c5231391d59 to your computer and use it in GitHub Desktop.
postprocessing of downloaded data
import argparse
import pickle
import yaml
import logging
import h5py
from os.path import join
import numpy as np
from tqdm import tqdm
import scipy.sparse as sp_sparse
import cupy as cp
import cupyx.scipy.sparse as cp_sparse
def rcn_infer_cpu(w_in,w_res,w_bi,w_out,leak,r_prev,u):
# applying input weights to the input.
# Sparse and dense matrix multiplication is different in Python
if sp_sparse.issparse(w_in):
a1 = w_in * u
else:
a1=np.dot(w_in, u)
a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
r_now = np.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
y = np.dot(np.append([1],r_now),w_out) # applying the output weight
return r_now,y
def rcn_infer_gpu(w_in, w_res, w_bi, w_out, leak, r_prev, u):
# applying input weights to the input.
# Sparse and dense matrix multiplication is different in Python
if cp_sparse.issparse(w_in):
a1 = w_in * u
else:
a1 = cp.dot(w_in, u)
a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
r_now = cp.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
y = cp.dot(cp.append([1], r_now), w_out) # applying the output weight
return r_now, y
def generate_AE_labels(rcn_datafile, shotlist, subsample, datadir):
"""Generete Alfven Eignemmode labels
This method performs Alfven Eigenmode detection using ECE data
using the RCN algorithm described in:
(See Jalalvand 2022: https://doi.org/10.1088/1741-4326/ac3be7)
For each shot, the ECE data is assumed to be stored in an HDF5 file:
AE_RT_dataset1/xxxxxx.h5, where
* DS_PATH is the path to the dataset
* xxxxxx.h5 is the HDF5 file of the shot name
The fast ECE data are assumed to be in datasets, one for each channel:
xxxxxx.h5
|
|---- /tece01/zdata
| ...
|---- /tece40/zdata
The output of the RCN algorithm is stored in the same h5 file as
xxxxxx.h5
|---- /AE_modes---/xdata
|--/zdata
Where xdata is the time-base and zdata a link to the timebase of tece01.
Args:
- rcn_datafile (str): Location of the weights to load.
- shotlist (list[int]): List of shots to process
- subsample (int): Subsample only every n-th frame
- datadir (str): Location of the xxxxxx.h5 files
"""
print("Generating AE labels")
with open(rcn_datafile, 'rb') as df:
infer_data = pickle.load(df)
n_res_l1 = infer_data['layer1']['w_in'].shape[0]
n_res_l2 = infer_data['layer2']['w_in'].shape[0]
for shotnr in shotlist[:1]:
with h5py.File(join(datadir, f"{shotnr:6d}.h5"), "a") as fp:
tb = fp["tecef01"]["xdata"][:]
good_idx = (tb > 0.0) & (tb < 5000.0)
ece_data = np.vstack([fp[f"tecef{i:02d}"]["zdata"][:] for i in range(1, 41)]).T
tb = tb[good_idx][::subsample]
ece_data = cp.array(ece_data[good_idx, :][::subsample])
print(tb.shape, ece_data.shape, ece_data.dtype)
ae_probs_gpu = cp.zeros([ece_data.shape[0], 5], dtype=np.float32)
ece_data = cp.array((ece_data - infer_data["mean"]) / infer_data["std"])
# Initialize to zero, overwrite in for loop
r_prev = {"layer1": cp.zeros(n_res_l1),
"layer2": cp.zeros(n_res_l2)}
infer_data["layer1"]["w_in"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_in"])
infer_data["layer1"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_res"])
infer_data["layer1"]["w_bi"] = cp.array(infer_data["layer1"]["w_bi"])
infer_data["layer1"]["w_out"] = cp.array(infer_data["layer1"]["w_out"])
infer_data["layer2"]["w_in"] = cp.array(infer_data["layer2"]["w_in"])
infer_data["layer2"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer2"]["w_res"])
infer_data["layer2"]["w_bi"] = cp.array(infer_data["layer2"]["w_bi"])
infer_data["layer2"]["w_out"] = cp.array(infer_data["layer2"]["w_out"])
# Iterate over time index 0
for idx, u in tqdm(enumerate(ece_data)):
L = "layer1"
r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
infer_data[L]['w_bi'], infer_data[L]['w_out'],
infer_data[L]['leak_rate'], r_prev[L], u.T)
L = "layer2"
r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
infer_data[L]['w_bi'], infer_data[L]['w_out'],
infer_data[L]['leak_rate'], r_prev[L], y)
ae_probs_gpu[idx, :] = y[:]
ae_probs = ae_probs_gpu.get()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="postprocess.py",
description="Post-processes D3D datasets")
parser.add_argument("--dataset_def", type=str,
help="YAML file that contains definition of the dataset",
default="AE_datasets/AE_RT_dataset1.yaml")
parser.add_argument("--datadir", type=str,
help="Location of the xxxxxx.h5 files",
default="AE_RT_dataset1")
parser.add_argument("--signal_defs_0d", type=str,
default="d3d_signals/signals_0d.yaml",
help="YAML file that contains 0d signal informations")
parser.add_argument("--signal_defs_1d", type=str,
default="d3d_signals/signals_1d.yaml",
help="YAML file that contains 1d signal informations")
parser.add_argument("--rcn_datafile", type=str,
default="/projects/EKOLEMEN/AE_datasets/1dsignal-model-AE-ECE-RCN.pkl",
help="Location of RCN data file")
args = parser.parse_args()
with open(args.dataset_def, "r") as stream:
dataset_def = yaml.safe_load(stream)
generate_AE_labels(args.rcn_datafile, dataset_def["shots"], 2, args.datadir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment