Last active
October 5, 2017 19:42
-
-
Save molpopgen/574a77a658f97fe1414ffbeef80c4a9e to your computer and use it in GitHub Desktop.
WIP for fwdpy11 <-> PyTables interaction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Test code writing fwdpy11 | |
# pop data to hdf5 via PyTables. | |
# GOAL IS COMPRESSION!!!! | |
# Instead of using ragged arrays | |
# (not compressable), we use carrays, | |
# and simply concacenate gamete data. | |
# In longer-term, this would be | |
# abstracted into some sort of GameteTable | |
# class that interacts with an hdf5 object. | |
# This totally shows how it is easy to get | |
# a lot of copy/pasted code. There is a lot | |
# of room for abstraction... | |
import tests.quick_pops | |
import tables | |
import numpy as np | |
import pickle | |
import lzma | |
import fwdpy11 | |
import collections | |
import sys | |
COMPLEVEL = 5 | |
def _fillTable(t, d): | |
""" | |
There is a bug in PyTables w.r.to direct | |
insertion from a structured array. The workaround | |
is to manually enter all data. | |
https://github.com/PyTables/PyTables/issues/643 | |
:param t: a tables.Table | |
:param d: a NumPy record array | |
""" | |
keys = d.dtype.names | |
for datum in d: | |
row = t.row | |
for i, key in enumerate(keys): | |
row[key] = datum[i] | |
row.append() | |
def _merge_record_arrays(arrays): | |
""" | |
Merges numpy record arrays | |
:param arrays: tuple of record arrays | |
""" | |
import numpy.lib.recfunctions as rfn | |
return rfn.merge_arrays(arrays, | |
flatten=True, usemask=False) | |
def _write_mutations(h5f, group, tablename, mutation_container, | |
uint_container, | |
uint_container_dtype_name, | |
filters=None): | |
marray = _merge_record_arrays((np.array(mutation_container.array()), | |
np.array(uint_container, | |
dtype=[(uint_container_dtype_name, | |
np.uint32)]))) | |
return h5f.create_table(group, tablename, obj=marray, filters=filters) | |
def _rebuild_mutations(mutation_table): | |
mutations = fwdpy11.MutationContainer() | |
uintvec = fwdpy11.VectorUint32() | |
field = None | |
valid_fields = ['mcounts', 'ftime'] | |
for f in valid_fields: | |
if any(i == f for i in mutation_table.dtype.names) is True: | |
field = f | |
for record in mutation_table.iterrows(): | |
uintvec.append(record[field]) | |
# Things are out-of-order here | |
# b/c of how C++ representation | |
# to get into numpy array | |
# packs the struct nicely | |
m = fwdpy11.Mutation( | |
record['pos'], record['s'], record['h'], | |
record['g'], record['label']) | |
mutations.append(m) | |
if field == 'mcount': | |
rvtype = collections.namedtuple( | |
'MutationTableData', ['mutations', 'mcounts']) | |
rv = rvtype(mutations, uintvec) | |
else: | |
rvtype = collections.namedtuple( | |
'FixationTableData', ['fixations', 'fixation_times']) | |
rv = rvtype(mutations, uintvec) | |
return rv | |
def _write_gamete_mutation_keys(h5f, group, name1, | |
name2, gametes, attribute_name, filters): | |
u32atom = tables.UInt32Atom() | |
# Write length of each gametes neutral muts container | |
n_array = h5f.create_carray(group, name1, u32atom, | |
(len(gametes),), filters=filters) | |
for i in range(len(gametes)): | |
n_array[i] = len(getattr(gametes[i], attribute_name)) | |
n_array.flush() | |
# Write the neutral gamete data to the file | |
nneutral = sum([len(getattr(i, attribute_name)) for i in gametes]) | |
neutral_array = h5f.create_carray(group, name2, u32atom, | |
(nneutral,), filters=filters) | |
dummy = 0 | |
for i in gametes: | |
neutral_array[dummy:(dummy + len(getattr(i, attribute_name))) | |
] = np.array(getattr(i, attribute_name)) | |
dummy += len(getattr(i, attribute_name)) | |
if dummy != nneutral: | |
raise RuntimeError("inconsistent internal state") | |
neutral_array.flush() | |
def _write_gametes(h5f, group, gametes, filters): | |
# Write number of occurrences of each gamete to file | |
u32atom = tables.UInt32Atom() | |
n_array = h5f.create_carray(group, 'gamete_n', u32atom, | |
(len(gametes),), filters=filters) | |
for i in range(len(gametes)): | |
n_array[i] = gametes[i].n | |
n_array.flush() | |
_write_gamete_mutation_keys( | |
h5f, | |
group, 'gamete_num_neutral', | |
'gamete_mutations', gametes, 'mutations', filters) | |
_write_gamete_mutation_keys( | |
h5f, | |
group, 'gamete_num_selected', | |
'gamete_smutations', gametes, 'smutations', filters) | |
def _read_gamete_num_mutations(group, name): | |
if hasattr(group, name) is False: | |
raise ValueError("path does not exist") | |
n = getattr(group, name) | |
rv = fwdpy11.VectorUint32() | |
for i in n: | |
rv.append(i) | |
return rv | |
def _read_gametes(group): | |
nneutral_per_gam = _read_gamete_num_mutations( | |
group, 'gamete_num_neutral') | |
nselected_per_gam = _read_gamete_num_mutations( | |
group, 'gamete_num_selected') | |
gamete_n = _read_gamete_num_mutations(group, 'gamete_n') | |
if len(set([len(nneutral_per_gam), | |
len(nselected_per_gam), | |
len(gamete_n)])) != 1: | |
raise RuntimeError("inconsistent container lengths") | |
gametes = fwdpy11.GameteContainer() | |
ndata = getattr(group, 'gamete_mutations') | |
sdata = getattr(group, 'gamete_smutations') | |
ndummy, sdummy = 0, 0 | |
for neut, sel, n in zip(nneutral_per_gam, nselected_per_gam, gamete_n): | |
tn = fwdpy11.VectorUint32() | |
ts = fwdpy11.VectorUint32() | |
for i in ndata[ndummy:(ndummy + neut)]: | |
tn.append(i) | |
for i in sdata[sdummy:(sdummy + sel)]: | |
ts.append(i) | |
gametes.append(fwdpy11.Gamete((n, tn, ts))) | |
ndummy += neut | |
sdummy += sel | |
assert(ndummy == len(ndata)) | |
assert(sdummy == len(sdata)) | |
return gametes | |
def _write_slocus_diploids(h5f, group, diploids, filters): | |
g = h5f.create_group(group, 'diploids') | |
t = h5f.create_table(g, 'traits', obj=np.array(diploids.trait_array())) | |
t.flush() | |
t = h5f.create_table(g, 'keys', obj=np.array(diploids.key_array())) | |
t.flush() | |
def _read_slocus_diploids(path): | |
trait_path = getattr(path, 'traits') | |
key_path = getattr(path, 'keys') | |
# "typedef" | |
Diploid = fwdpy11.SingleLocusDiploid | |
if len(trait_path) == len(key_path): | |
rv = fwdpy11.DiploidContainer() | |
for i, j in zip(enumerate(trait_path.iterrows()), key_path): | |
rv.append(Diploid.create(j['first'], | |
j['second'], | |
i[0], i[1]['g'], | |
i[1]['e'], i[1]['w'] | |
)) | |
return rv | |
def _write_generation(h5f, group, g, filters): | |
carray = h5f.create_carray(group, 'generation', obj=g, | |
filters=filters) | |
carray.flush() | |
PopHDF = collections.namedtuple('PopHDF', ['poptype', 'pathname']) | |
def h5write_pop(pop, h5f, group, filters=None): | |
# Does the group exist? | |
if hasattr(h5f, group) is False: | |
group = h5f.create_group(h5f.root, group, filters=filters) | |
if isinstance(pop, fwdpy11.SlocusPop): | |
_write_generation(h5f, group, np.array( | |
[pop.N] * 1, dtype=np.uint32), filters) | |
_write_slocus_diploids(h5f, group, pop.diploids, filters) | |
gamete_group = h5f.create_group(group, 'gametes') | |
_write_gametes(h5f, gamete_group, pop.gametes, filters) | |
mtab = _write_mutations(h5f, | |
group, 'mutations', | |
pop.mutations, pop.mcounts, | |
'mcounts', | |
filters=filters) | |
mtab.flush() | |
ftab = _write_mutations(h5f, group, 'fixations', | |
pop.fixations, pop.fixation_times, | |
'ftime', | |
filters=filters) | |
ftab.flush() | |
return PopHDF(pop.__class__, group._v_pathname) | |
else: | |
raise NotImplementedError("unsupported population type") | |
def h5read_pop(h5f, pophdf): | |
path = h5f.get_node(h5f.root, pophdf.pathname) | |
mut_root = h5f.get_node(path, 'mutations') | |
mutations, mcounts = _rebuild_mutations(mut_root) | |
fix_root = h5f.get_node(path, 'fixations') | |
fixations, fixation_times = _rebuild_mutations(fix_root) | |
dip_root = h5f.get_node(path, 'diploids') | |
diploids = _read_slocus_diploids(dip_root) | |
gam_root = h5f.get_node(path, 'gametes') | |
gametes = _read_gametes(gam_root) | |
return pophdf.poptype.create_with_fixations( | |
diploids, gametes, mutations, fixations, fixation_times, 0) | |
if __name__ == "__main__": | |
pop = tests.quick_pops.quick_nonneutral_slocus(N=1000, simlen=10000) | |
h5f = tables.open_file("population.h5", 'w', | |
filters=tables.Filters(complevel=COMPLEVEL, | |
complib='zlib')) | |
global_filters = tables.Filters(complevel=COMPLEVEL, complib='zlib') | |
pw = h5write_pop(pop, h5f, 'inner_group', global_filters) | |
h5f.close() | |
h5f = tables.open_file("population.h5", 'r') | |
pop2 = h5read_pop(h5f, pw) | |
h5f.close() | |
assert(pop == pop2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment