suvarchal/Readme.md

## Readme.md

      
    Raw
  

              Readme.md
            
          
    Testing hecuba API used in FESOM2

1.load any relavent modules and export HECUBA_ROOT

export HECUBA_ROOT=/path/to/hecuba/root
2. start cassandra with your choice of method, mine for testing is often. memory part is optional

docker run -it -p 7000:7000 -p 7199:7199 -p 9042:9042 -p 9160:9160 --memory 8g cassandra:4.0
3. Build a shared library similar to what is used in fesom for use in c. use compiler of choice like mpic++ etc instead of g++ below

g++ -fpic -shared hecuba_c_connector.cpp -I ${HECUBA_ROOT}/include -L${HECUBA_ROOT}/lib  -lhfetch   -Wl,-rpath,${HECUBA_ROOT}/lib -o libhconn.so
4. compile test program with c compiler of choice and link with libhconn and set rpath so that we dont have to use LD_LIBRARY_PATH=$(PWD)

gcc hecuba_api_test.c -L. -lhconn -Wl,-rpath,$(pwd)
5. run the test

./a.out

  
## compile_and_run.sh

# 1.load any relavent modules and export HECUBA_ROOT

export HECUBA_ROOT=/path/to/hecuba/root

# 2. start cassandra with your choice of method, mine for testing is often. memory part is optional

docker run -it -p 7000:7000 -p 7199:7199 -p 9042:9042 -p 9160:9160 --memory 8g cassandra:4.0

# 3. Build a shared library similar to what is used in fesom for use in c. use compiler of choice like mpic++ etc instead of g++ below
g++ -fpic -shared hecuba_c_connector.cpp -I ${HECUBA_ROOT}/include -L${HECUBA_ROOT}/lib  -lhfetch   -Wl,-rpath,${HECUBA_ROOT}/lib -o libhconn.so

# 4. compile test program with c compiler of choice and link with libhconn and set rpath so that we dont have to use LD_LIBRARY_PATH=$(PWD)
gcc hecuba_api_test.c -L. -lhconn -Wl,-rpath,$(pwd)

# 5. run the test
./a.out

## hecuba_api_test.c
#include <stdio.h>
#include "hecuba_c_connector.h"

int main() {
    double data[16] = {101.,102.,104.,140.,109., 101.,102.,104.,140.,109., 101.,102.,104.,140.,109., 999.};
    long unsigned int size = sizeof(data);
    int chunk = 0;
    float ctime = 10.1;

    start_hecuba_session();
    load_datamodel();
    printf("sixe of data %lu", size);
    hecuba_put_array_val_C("ftest", data, &size);

    //hecuba_put_array_val_C2("ftest4",ctime, chunk, data, &size);

    return 0;
}


## hecuba_c_connector.cpp
#include <hecuba/HecubaSession.h>
#include <hecuba/IStorage.h>
#include <hecuba/UUID.h>
#include <iostream>
#include "hecuba_c_connector.h"

#ifdef __cplusplus
extern "C" {
#endif

// static shared across a session
static HecubaSession *hsession = NULL;

void start_hecuba_session() {
    std::cout<< "+ STARTING C++ APP"<<std::endl;
    hsession = new HecubaSession();
    std::cout<< "+ Session started"<<std::endl;
}


void load_datamodel(){

    if (hsession == NULL) {
        start_hecuba_session();
    } else {

        std::cout<< "Session exists"<<std::endl;
    }

    //(*hsession).loadDataModel("model_class.yaml","model_class.py");

    // or
    hsession->loadDataModel("model_class.yaml","model_class.py");
    std::cout<< "+ Data Model loaded"<<std::endl;

}


char * generateKey(float ctime, int chunk) {

    char * key = (char*) malloc (sizeof(float) + sizeof(int));
    float *time_key = (float*) key;
    *time_key = ctime;
    int *chunk_key = (int*) (key + sizeof(float));
    *chunk_key = chunk;
    std::cout << " generatekey sizeof(float) "<< sizeof(float) << " sizeof(int) " << sizeof(int)<< std::endl;
    return key;
}

//generate 1d array useful for scalars, not sure what is the point of shape
//in the end it is a blob of size, unless it is performance issue use 1d
//interestingly can use f-ordering by sending cols, then rows
char * generateMetas(size_t arrsize, int dtypesize) {

    unsigned int * metas = (unsigned int *) malloc(sizeof(unsigned int) * 3);
    //using 2 because fesom scalars are 1d..
    metas[0]=2; // number of dimmensions
    metas[1]=4; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension
    metas[2]=4;

    return (char *) metas;
}
char * generateMetas2(int array_len) {

    unsigned int * metas = (unsigned int *) malloc(sizeof(unsigned int) * 3);
    //using 2 because fesom scalars are 1d..
    metas[0]=2; // number of dimmensions

    // this makes 1d data into multi  cols when casted as list, like [[data[0]], [data[1]], ...]
    //metas[1]=array_len; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension
    //metas[2]=1;
    // this makes 1d data into rows like
    metas[1]=1;
    metas[2]=array_len; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension

    return (char *) metas;
}

//add dtype size
void hecuba_put_array_val_C(char *key, void *valueC, long unsigned int *arr_size){
   char * numpymeta;
   load_datamodel();
   numpymeta = generateMetas(*arr_size, sizeof(float));

   IStorage *mi_sn = (*hsession).createObject("hecuba.hnumpy.StorageNumpy",key, numpymeta,valueC);
   (*mi_sn).sync();
}

//add dtype size
void hecuba_put_array_val_C2(char *varname, double ctime, int chunk, void *valueC, long unsigned int *array_size){
   char * numpymeta;
   char * key;
   char * random_key;

   load_datamodel();
   numpymeta = generateMetas2(*array_size);

   //if we use same key in another instance/time then the table will be replaced so
   //generate a random name,
   //optionally can just add ctime, chunk info to varname and use it

   //std::string random_str = (hsession->UUID2str(hsession->generateUUID()));
   // if we use above UUID then it complains error as no metas found, some how
   // (i am guessing if there is checking for  -, because using aaa-bbb as key wont work,
   // also . wont work)
   // it knows it is a UUID string so making substring using last 12 chars
   std::string random_str = varname;
   //use substring
   random_str.append("_");
   //random_str.append((hsession->UUID2str(hsession->generateUUID())).substr(24));
   random_str.append((UUID::UUID2str(UUID::generateUUID())).substr(24));
   //cast as char array
   random_key= &random_str[0];

   std::cout<< "+ random_key: "<<random_key<<std::endl;

   IStorage *mi_sn = (*hsession).createObject("hecuba.hnumpy.StorageNumpy",random_key, numpymeta,valueC);

   (*mi_sn).sync(); //delay sync
   std::cout<< "+ done_sync: "<<random_key<<std::endl;
   // usually doesn't replace better read as get by alias
   IStorage *var = (*hsession).createObject("midict", varname);
   key = generateKey(ctime, chunk);
   var->setItem((void*)key, mi_sn);
   var->sync();
}

#ifdef __cplusplus
}
#endif

//int main(){
//  start_hecuba_session();
//  load_datamodel();
//}

## hecuba_c_connector.h
#ifndef HECUBA_C_CONNECTOR_H
#define HECUBA_C_CONNECTOR_H

#ifdef __cplusplus
extern "C" {
#endif

void start_hecuba_session();
void load_datamodel();
void hecuba_put_array_val_C(char *key, void *valueC, long unsigned int *arr_size);
void hecuba_put_array_val_C2(char *varname, double ctime, int chunk, void *valueC, long unsigned int *arr_size);

#ifdef __cplusplus
}
#endif


#endif


## model_class.py
from hecuba import StorageDict

class mydict (StorageDict):
   '''
   @TypeSpec dict <<lat:float,ts:int>,metrics:numpy.ndarray>
   '''

## model_class.yaml
- "TypeSpec": [ "mydict", "StorageDict" ]
  "KeySpec":
            - [ "lat", "float"]     #1st is partiion key
            - [ "ts", "int" ]       #Remaining are the clustering
  "ValueSpec":
            - [ "metrics", "numpy.ndarray" ]
#
# This model will generate the following Python file:
#    from hecuba import StorageDict
#
#    class midict (StorageDict):
#       '''
#       @TypeSpec dict <<lat:float,ts:int>,metrics:numpy.ndarray>
#       '''

	# 1.load any relavent modules and export HECUBA_ROOT

	export HECUBA_ROOT=/path/to/hecuba/root

	# 2. start cassandra with your choice of method, mine for testing is often. memory part is optional

	docker run -it -p 7000:7000 -p 7199:7199 -p 9042:9042 -p 9160:9160 --memory 8g cassandra:4.0

	# 3. Build a shared library similar to what is used in fesom for use in c. use compiler of choice like mpic++ etc instead of g++ below
	g++ -fpic -shared hecuba_c_connector.cpp -I ${HECUBA_ROOT}/include -L${HECUBA_ROOT}/lib -lhfetch -Wl,-rpath,${HECUBA_ROOT}/lib -o libhconn.so

	# 4. compile test program with c compiler of choice and link with libhconn and set rpath so that we dont have to use LD_LIBRARY_PATH=$(PWD)
	gcc hecuba_api_test.c -L. -lhconn -Wl,-rpath,$(pwd)

	# 5. run the test
	./a.out
	#include <stdio.h>
	#include "hecuba_c_connector.h"

	int main() {
	double data[16] = {101.,102.,104.,140.,109., 101.,102.,104.,140.,109., 101.,102.,104.,140.,109., 999.};
	long unsigned int size = sizeof(data);
	int chunk = 0;
	float ctime = 10.1;

	start_hecuba_session();
	load_datamodel();
	printf("sixe of data %lu", size);
	hecuba_put_array_val_C("ftest", data, &size);

	//hecuba_put_array_val_C2("ftest4",ctime, chunk, data, &size);

	return 0;
	}
	#include <hecuba/HecubaSession.h>
	#include <hecuba/IStorage.h>
	#include <hecuba/UUID.h>
	#include <iostream>
	#include "hecuba_c_connector.h"

	#ifdef __cplusplus
	extern "C" {
	#endif

	// static shared across a session
	static HecubaSession *hsession = NULL;

	void start_hecuba_session() {
	std::cout<< "+ STARTING C++ APP"<<std::endl;
	hsession = new HecubaSession();
	std::cout<< "+ Session started"<<std::endl;
	}


	void load_datamodel(){

	if (hsession == NULL) {
	start_hecuba_session();
	} else {

	std::cout<< "Session exists"<<std::endl;
	}

	//(*hsession).loadDataModel("model_class.yaml","model_class.py");

	// or
	hsession->loadDataModel("model_class.yaml","model_class.py");
	std::cout<< "+ Data Model loaded"<<std::endl;

	}


	char * generateKey(float ctime, int chunk) {

	char * key = (char*) malloc (sizeof(float) + sizeof(int));
	float time_key = (float) key;
	*time_key = ctime;
	int chunk_key = (int) (key + sizeof(float));
	*chunk_key = chunk;
	std::cout << " generatekey sizeof(float) "<< sizeof(float) << " sizeof(int) " << sizeof(int)<< std::endl;
	return key;
	}

	//generate 1d array useful for scalars, not sure what is the point of shape
	//in the end it is a blob of size, unless it is performance issue use 1d
	//interestingly can use f-ordering by sending cols, then rows
	char * generateMetas(size_t arrsize, int dtypesize) {

	unsigned int * metas = (unsigned int ) malloc(sizeof(unsigned int) 3);
	//using 2 because fesom scalars are 1d..
	metas[0]=2; // number of dimmensions
	metas[1]=4; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension
	metas[2]=4;

	return (char *) metas;
	}
	char * generateMetas2(int array_len) {

	unsigned int * metas = (unsigned int ) malloc(sizeof(unsigned int) 3);
	//using 2 because fesom scalars are 1d..
	metas[0]=2; // number of dimmensions

	// this makes 1d data into multi cols when casted as list, like [[data[0]], [data[1]], ...]
	//metas[1]=array_len; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension
	//metas[2]=1;
	// this makes 1d data into rows like
	metas[1]=1;
	metas[2]=array_len; //=(unsigned int) arrsize/dtypesize; //dtypesize; // number of elements in the first dimmension

	return (char *) metas;
	}

	//add dtype size
	void hecuba_put_array_val_C(char key, void valueC, long unsigned int *arr_size){
	char * numpymeta;
	load_datamodel();
	numpymeta = generateMetas(*arr_size, sizeof(float));

	IStorage mi_sn = (hsession).createObject("hecuba.hnumpy.StorageNumpy",key, numpymeta,valueC);
	(*mi_sn).sync();
	}

	//add dtype size
	void hecuba_put_array_val_C2(char varname, double ctime, int chunk, void valueC, long unsigned int *array_size){
	char * numpymeta;
	char * key;
	char * random_key;

	load_datamodel();
	numpymeta = generateMetas2(*array_size);

	//if we use same key in another instance/time then the table will be replaced so
	//generate a random name,
	//optionally can just add ctime, chunk info to varname and use it

	//std::string random_str = (hsession->UUID2str(hsession->generateUUID()));
	// if we use above UUID then it complains error as no metas found, some how
	// (i am guessing if there is checking for -, because using aaa-bbb as key wont work,
	// also . wont work)
	// it knows it is a UUID string so making substring using last 12 chars
	std::string random_str = varname;
	//use substring
	random_str.append("_");
	//random_str.append((hsession->UUID2str(hsession->generateUUID())).substr(24));
	random_str.append((UUID::UUID2str(UUID::generateUUID())).substr(24));
	//cast as char array
	random_key= &random_str[0];

	std::cout<< "+ random_key: "<<random_key<<std::endl;

	IStorage mi_sn = (hsession).createObject("hecuba.hnumpy.StorageNumpy",random_key, numpymeta,valueC);

	(*mi_sn).sync(); //delay sync
	std::cout<< "+ done_sync: "<<random_key<<std::endl;
	// usually doesn't replace better read as get by alias
	IStorage var = (hsession).createObject("midict", varname);
	key = generateKey(ctime, chunk);
	var->setItem((void*)key, mi_sn);
	var->sync();
	}

	#ifdef __cplusplus
	}
	#endif

	//int main(){
	// start_hecuba_session();
	// load_datamodel();
	//}
	#ifndef HECUBA_C_CONNECTOR_H
	#define HECUBA_C_CONNECTOR_H

	#ifdef __cplusplus
	extern "C" {
	#endif

	void start_hecuba_session();
	void load_datamodel();
	void hecuba_put_array_val_C(char key, void valueC, long unsigned int *arr_size);
	void hecuba_put_array_val_C2(char varname, double ctime, int chunk, void valueC, long unsigned int *arr_size);

	#ifdef __cplusplus
	}
	#endif


	#endif
	from hecuba import StorageDict

	class mydict (StorageDict):
	'''
	@TypeSpec dict <<lat:float,ts:int>,metrics:numpy.ndarray>
	'''
	- "TypeSpec": [ "mydict", "StorageDict" ]
	"KeySpec":
	- [ "lat", "float"] #1st is partiion key
	- [ "ts", "int" ] #Remaining are the clustering
	"ValueSpec":
	- [ "metrics", "numpy.ndarray" ]
	#
	# This model will generate the following Python file:
	# from hecuba import StorageDict
	#
	# class midict (StorageDict):
	# '''
	# @TypeSpec dict <<lat:float,ts:int>,metrics:numpy.ndarray>
	# '''