rkube/postprocess.py

## postprocess.py

import argparse
import pickle
import yaml
import logging
import h5py
from os.path import join
import numpy as np
from tqdm import tqdm
import scipy.sparse as sp_sparse
import cupy as cp
import cupyx.scipy.sparse as cp_sparse

def rcn_infer_cpu(w_in,w_res,w_bi,w_out,leak,r_prev,u):
    # applying input weights to the input.
    # Sparse and dense matrix multiplication is different in Python
    if sp_sparse.issparse(w_in):
        a1 = w_in * u
    else:
        a1=np.dot(w_in, u)
    a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
    r_now = np.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
    r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
    y = np.dot(np.append([1],r_now),w_out) # applying the output weight
    return r_now,y


def rcn_infer_gpu(w_in, w_res, w_bi, w_out, leak, r_prev, u):
    # applying input weights to the input.
    # Sparse and dense matrix multiplication is different in Python
    if cp_sparse.issparse(w_in):
        a1 = w_in * u
    else:
        a1 = cp.dot(w_in, u)
    a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
    r_now = cp.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
    r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
    y = cp.dot(cp.append([1], r_now), w_out) # applying the output weight
    return r_now, y


def generate_AE_labels(rcn_datafile, shotlist, subsample, datadir):
    """Generete Alfven Eignemmode labels

    This method performs Alfven Eigenmode detection using ECE data
    using the RCN algorithm described in:
    (See Jalalvand 2022: https://doi.org/10.1088/1741-4326/ac3be7)

    For each shot, the ECE data is assumed to be stored in an HDF5 file:

    AE_RT_dataset1/xxxxxx.h5, where
    * DS_PATH is the path to the dataset
    * xxxxxx.h5 is the HDF5 file of the shot name

    The fast ECE data are assumed to be in datasets, one for each channel:
    xxxxxx.h5
    |
    |---- /tece01/zdata
    | ...
    |---- /tece40/zdata


    The output of the RCN algorithm is stored in the same h5 file as
    xxxxxx.h5
    |---- /AE_modes---/xdata
                   |--/zdata

    Where xdata is the time-base and zdata a link to the timebase of tece01.

    Args:
        - rcn_datafile (str): Location of the weights to load.
        - shotlist (list[int]): List of shots to process
        - subsample (int): Subsample only every n-th frame
        - datadir (str): Location of the xxxxxx.h5 files
    """
    print("Generating AE labels")

    with open(rcn_datafile, 'rb') as df:
        infer_data = pickle.load(df)
    n_res_l1 = infer_data['layer1']['w_in'].shape[0]
    n_res_l2 = infer_data['layer2']['w_in'].shape[0]

    for shotnr in shotlist[:1]:
        with h5py.File(join(datadir, f"{shotnr:6d}.h5"), "a") as fp:
            tb = fp["tecef01"]["xdata"][:]
            good_idx = (tb > 0.0) & (tb < 5000.0)
            ece_data = np.vstack([fp[f"tecef{i:02d}"]["zdata"][:] for i in range(1, 41)]).T


        tb = tb[good_idx][::subsample]
        ece_data = cp.array(ece_data[good_idx, :][::subsample])

        print(tb.shape, ece_data.shape, ece_data.dtype)
        ae_probs_gpu = cp.zeros([ece_data.shape[0], 5], dtype=np.float32)

        ece_data = cp.array((ece_data - infer_data["mean"]) / infer_data["std"])
        # Initialize to zero, overwrite in for loop
        r_prev = {"layer1": cp.zeros(n_res_l1),
                  "layer2": cp.zeros(n_res_l2)}

        infer_data["layer1"]["w_in"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_in"])
        infer_data["layer1"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_res"])
        infer_data["layer1"]["w_bi"] = cp.array(infer_data["layer1"]["w_bi"])
        infer_data["layer1"]["w_out"] = cp.array(infer_data["layer1"]["w_out"])

        infer_data["layer2"]["w_in"] = cp.array(infer_data["layer2"]["w_in"])
        infer_data["layer2"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer2"]["w_res"])
        infer_data["layer2"]["w_bi"] = cp.array(infer_data["layer2"]["w_bi"])
        infer_data["layer2"]["w_out"] = cp.array(infer_data["layer2"]["w_out"])
        # Iterate over time index 0
        for idx, u in tqdm(enumerate(ece_data)):

            L = "layer1"
            r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
                                         infer_data[L]['w_bi'], infer_data[L]['w_out'],
                                         infer_data[L]['leak_rate'], r_prev[L], u.T)

            L = "layer2"
            r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
                                         infer_data[L]['w_bi'], infer_data[L]['w_out'],
                                         infer_data[L]['leak_rate'], r_prev[L], y)
            ae_probs_gpu[idx, :] = y[:]

        ae_probs = ae_probs_gpu.get()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog="postprocess.py",
        description="Post-processes D3D datasets")
    parser.add_argument("--dataset_def", type=str,
        help="YAML file that contains definition of the dataset",
        default="AE_datasets/AE_RT_dataset1.yaml")
    parser.add_argument("--datadir", type=str,
        help="Location of the xxxxxx.h5 files",
        default="AE_RT_dataset1")
    parser.add_argument("--signal_defs_0d", type=str,
        default="d3d_signals/signals_0d.yaml",
        help="YAML file that contains 0d signal informations")
    parser.add_argument("--signal_defs_1d", type=str,
        default="d3d_signals/signals_1d.yaml",
        help="YAML file that contains 1d signal informations")
    parser.add_argument("--rcn_datafile", type=str,
        default="/projects/EKOLEMEN/AE_datasets/1dsignal-model-AE-ECE-RCN.pkl",
        help="Location of RCN data file")

    args = parser.parse_args()

    with open(args.dataset_def, "r") as stream:
        dataset_def = yaml.safe_load(stream)

    generate_AE_labels(args.rcn_datafile, dataset_def["shots"], 2, args.datadir)

	import argparse
	import pickle
	import yaml
	import logging
	import h5py
	from os.path import join
	import numpy as np
	from tqdm import tqdm
	import scipy.sparse as sp_sparse
	import cupy as cp
	import cupyx.scipy.sparse as cp_sparse

	def rcn_infer_cpu(w_in,w_res,w_bi,w_out,leak,r_prev,u):
	# applying input weights to the input.
	# Sparse and dense matrix multiplication is different in Python
	if sp_sparse.issparse(w_in):
	a1 = w_in * u
	else:
	a1=np.dot(w_in, u)
	a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
	r_now = np.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
	r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
	y = np.dot(np.append([1],r_now),w_out) # applying the output weight
	return r_now,y


	def rcn_infer_gpu(w_in, w_res, w_bi, w_out, leak, r_prev, u):
	# applying input weights to the input.
	# Sparse and dense matrix multiplication is different in Python
	if cp_sparse.issparse(w_in):
	a1 = w_in * u
	else:
	a1 = cp.dot(w_in, u)
	a2 = w_res * r_prev # applying recurrent weights to the previous reservoir states
	r_now = cp.tanh(a1 + a2 + w_bi) # adding bias and applying activation function
	r_now = (1 - leak) * r_prev + leak * r_now # applying leak rate
	y = cp.dot(cp.append([1], r_now), w_out) # applying the output weight
	return r_now, y



	def generate_AE_labels(rcn_datafile, shotlist, subsample, datadir):
	"""Generete Alfven Eignemmode labels

	This method performs Alfven Eigenmode detection using ECE data
	using the RCN algorithm described in:
	(See Jalalvand 2022: https://doi.org/10.1088/1741-4326/ac3be7)

	For each shot, the ECE data is assumed to be stored in an HDF5 file:

	AE_RT_dataset1/xxxxxx.h5, where
	* DS_PATH is the path to the dataset
	* xxxxxx.h5 is the HDF5 file of the shot name

	The fast ECE data are assumed to be in datasets, one for each channel:
	xxxxxx.h5
	\|
	\|---- /tece01/zdata
	\| ...
	\|---- /tece40/zdata


	The output of the RCN algorithm is stored in the same h5 file as
	xxxxxx.h5
	\|---- /AE_modes---/xdata
	\|--/zdata

	Where xdata is the time-base and zdata a link to the timebase of tece01.

	Args:
	- rcn_datafile (str): Location of the weights to load.
	- shotlist (list[int]): List of shots to process
	- subsample (int): Subsample only every n-th frame
	- datadir (str): Location of the xxxxxx.h5 files
	"""
	print("Generating AE labels")

	with open(rcn_datafile, 'rb') as df:
	infer_data = pickle.load(df)
	n_res_l1 = infer_data['layer1']['w_in'].shape[0]
	n_res_l2 = infer_data['layer2']['w_in'].shape[0]

	for shotnr in shotlist[:1]:
	with h5py.File(join(datadir, f"{shotnr:6d}.h5"), "a") as fp:
	tb = fp["tecef01"]["xdata"][:]
	good_idx = (tb > 0.0) & (tb < 5000.0)
	ece_data = np.vstack([fp[f"tecef{i:02d}"]["zdata"][:] for i in range(1, 41)]).T


	tb = tb[good_idx][::subsample]
	ece_data = cp.array(ece_data[good_idx, :][::subsample])

	print(tb.shape, ece_data.shape, ece_data.dtype)
	ae_probs_gpu = cp.zeros([ece_data.shape[0], 5], dtype=np.float32)

	ece_data = cp.array((ece_data - infer_data["mean"]) / infer_data["std"])
	# Initialize to zero, overwrite in for loop
	r_prev = {"layer1": cp.zeros(n_res_l1),
	"layer2": cp.zeros(n_res_l2)}

	infer_data["layer1"]["w_in"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_in"])
	infer_data["layer1"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer1"]["w_res"])
	infer_data["layer1"]["w_bi"] = cp.array(infer_data["layer1"]["w_bi"])
	infer_data["layer1"]["w_out"] = cp.array(infer_data["layer1"]["w_out"])

	infer_data["layer2"]["w_in"] = cp.array(infer_data["layer2"]["w_in"])
	infer_data["layer2"]["w_res"] = cp_sparse.csc_matrix(infer_data["layer2"]["w_res"])
	infer_data["layer2"]["w_bi"] = cp.array(infer_data["layer2"]["w_bi"])
	infer_data["layer2"]["w_out"] = cp.array(infer_data["layer2"]["w_out"])
	# Iterate over time index 0
	for idx, u in tqdm(enumerate(ece_data)):

	L = "layer1"
	r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
	infer_data[L]['w_bi'], infer_data[L]['w_out'],
	infer_data[L]['leak_rate'], r_prev[L], u.T)

	L = "layer2"
	r_prev[L], y = rcn_infer_gpu(infer_data[L]['w_in'], infer_data[L]['w_res'],
	infer_data[L]['w_bi'], infer_data[L]['w_out'],
	infer_data[L]['leak_rate'], r_prev[L], y)
	ae_probs_gpu[idx, :] = y[:]

	ae_probs = ae_probs_gpu.get()


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	prog="postprocess.py",
	description="Post-processes D3D datasets")
	parser.add_argument("--dataset_def", type=str,
	help="YAML file that contains definition of the dataset",
	default="AE_datasets/AE_RT_dataset1.yaml")
	parser.add_argument("--datadir", type=str,
	help="Location of the xxxxxx.h5 files",
	default="AE_RT_dataset1")
	parser.add_argument("--signal_defs_0d", type=str,
	default="d3d_signals/signals_0d.yaml",
	help="YAML file that contains 0d signal informations")
	parser.add_argument("--signal_defs_1d", type=str,
	default="d3d_signals/signals_1d.yaml",
	help="YAML file that contains 1d signal informations")
	parser.add_argument("--rcn_datafile", type=str,
	default="/projects/EKOLEMEN/AE_datasets/1dsignal-model-AE-ECE-RCN.pkl",
	help="Location of RCN data file")

	args = parser.parse_args()

	with open(args.dataset_def, "r") as stream:
	dataset_def = yaml.safe_load(stream)

	generate_AE_labels(args.rcn_datafile, dataset_def["shots"], 2, args.datadir)