Skip to content

Instantly share code, notes, and snippets.

@yueyericardo
Created June 11, 2021 01:19
Show Gist options
  • Save yueyericardo/613c9c4ac1de3c53284514820e0e79ab to your computer and use it in GitHub Desktop.
Save yueyericardo/613c9c4ac1de3c53284514820e0e79ab to your computer and use it in GitHub Desktop.
from torchani.datasets import AniH5Dataset
import os
import cudf
try:
path = os.path.dirname(os.path.realpath(__file__))
except NameError:
path = os.getcwd()
h5file = os.path.join(path, '../dataset/ani-1x/sample.h5')
dataset = AniH5Dataset(h5file)
print(list(dataset.keys()))
print(list(dataset['C01H01N03O02']))
print(dataset['C01H01N03O02']['species'])
conformers = dataset.get_conformers('C01H01N03O02', raw_output=False)
print(conformers['species'].shape)
print(conformers['coordinates'].shape)
print(conformers['energies'].shape)
print(conformers['forces'].shape)
num_conf = conformers['species'].shape[0]
df = cudf.DataFrame({'energies': conformers['energies']})
species = cudf.Series(conformers['species'].tolist())
coordinates = cudf.Series(conformers['coordinates'].reshape(num_conf, -1).tolist())
df['species'] = species
df['coordinates'] = coordinates
df.to_parquet('test.pq')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment