Skip to content

Instantly share code, notes, and snippets.

@mthomure
Last active May 9, 2017 15:40
Show Gist options
  • Save mthomure/9ec66f491dbfeb1599c9189e6e12ef08 to your computer and use it in GitHub Desktop.
Save mthomure/9ec66f491dbfeb1599c9189e6e12ef08 to your computer and use it in GitHub Desktop.
Simple HDF5 reader in clojure
(ns hdf5
(:import [org.bytedeco.javacpp
DoublePointer FloatPointer
LongPointer IntPointer
ShortPointer BytePointer]
[org.bytedeco.javacpp hdf5 hdf5$H5File]))
;; Using maven dependency:
;; [org.bytedeco.javacpp-presets/hdf5-platform "1.10.0-patch1-1.3"]
;; C++ API Reference
;; https://support.hdfgroup.org/HDF5/doc/cpplus_RM/
;; Example: discovering contents of file
;; https://support.hdfgroup.org/ftp/HDF5/examples/misc-examples/h5_info.c
(defn hdf5-file [path]
(hdf5$H5File. path hdf5/H5F_ACC_RDONLY))
(defn dataset-names [file]
(let [fg (.asCommonFG file)
num-objs (.getNumObjs fg)]
(map #(-> fg (.getObjnameByIdx %) .getString) (range num-objs))))
(defn dataset [file name]
(-> file .asCommonFG (.openDataSet name)))
(defn- ptr->array [ptr]
(let [buf-fn (condp instance? ptr
DoublePointer double-array
FloatPointer float-array
LongPointer long-array
IntPointer int-array
ShortPointer short-array
BytePointer byte-array)
buf (buf-fn (.capacity ptr))]
(.get ptr buf)
buf))
(defn- with-ptr [^long capacity type f]
(let [ptr (condp = type
double (DoublePointer. capacity)
float (FloatPointer. capacity)
long (LongPointer. capacity)
int (IntPointer. capacity)
short (ShortPointer. capacity)
byte (BytePointer. capacity))]
(f ptr)
ptr))
(defn dimensions [dataset]
(let [ndim (-> dataset .getSpace .getSimpleExtentNdims)
dims #(-> dataset .getSpace (.getSimpleExtentDims %))
type (case hdf5/H5_SIZEOF_HSIZE_T
8 long
4 int)]
(ptr->array (with-ptr ndim type dims))))
(defn ->array [dataset]
(let [mem-size (.getInMemDataSize dataset)
abs-dataset (.asAbstractDs dataset)
data-type (.getTypeClass abs-dataset)
nelem (reduce * (dimensions dataset))
dtype-size (quot mem-size nelem)
type (condp = [data-type dtype-size]
[hdf5/H5T_FLOAT 8] double
[hdf5/H5T_FLOAT 4] float
[hdf5/H5T_INTEGER 8] long
[hdf5/H5T_INTEGER 4] int
[hdf5/H5T_INTEGER 2] short
[hdf5/H5T_INTEGER 1] byte)]
(ptr->array
(with-ptr nelem type #(.read dataset % (.getDataType abs-dataset))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment