Last active
April 11, 2021 11:00
-
-
Save cstein/a0d8ccb623abd234fc7f950f93f6a49d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Builds a string of alpha-1,4-glucose residue names compatible with our | |
efforts at AAU to build a GLYCAM-like force field | |
author: Casper Steinmann | |
email: css@bio.aau.dk | |
""" | |
import argparse | |
import numpy | |
import numpy.random | |
rng_engine = numpy.random.default_rng() | |
EPS = 1.0e-6 | |
RES = {0: ["A"], 1: ["X", "Y", "Z"], 2: ["R", "S", "T"], 3: ["W"]} | |
RES_P = [0.1, 0.4, 0.3, 0.2] | |
assert sum(RES_P) - 1.0 < EPS | |
if __name__ == '__main__': | |
ap = argparse.ArgumentParser() | |
ap.add_argument("-s", dest="size", type=int, metavar="size", default=7, choices=("7"), help="Number of sugars in a ring. Default is %(default)s") | |
ap.add_argument("-d", dest="ds", type=int, metavar="ds", default=4, help="Degree of substitution. Default is %(default)s") | |
# constants for use with algorithm | |
args = ap.parse_args() | |
N_CD = args.size # size of CD ring | |
DS = args.ds # degree of substitutions | |
max_ds = N_CD * max(RES.keys()) | |
assert max_ds >= DS, "The DS (={}) cannot be larger than {}".format(DS, max_ds) | |
# lets sample numbers | |
ds_value = DS | |
ps = [] | |
while ds_value > 0: | |
n = rng_engine.choice(list(RES.keys()), p=RES_P) | |
ds_value -= n | |
ps.append(n) | |
assert ds_value <= 0 | |
# lets just assert that the value is less than some maximum | |
assert ds_value <= 21 | |
# We must make sure that the final list is | |
# only of length N_CD | |
while len(ps) > N_CD: | |
# our strategy is to pop the last item and add its value as 1's over the number of items | |
n = ps.pop() | |
items = [1 for i in range(n)] | |
while len(items) > 0: | |
value = items.pop() | |
# we just add from the start of the list | |
# but this will lead to more 3-substitutions | |
# which might be wrong | |
for i in range(len(ps)): | |
if ps[i] < 3: | |
ps[i] += value | |
break | |
else: | |
continue | |
# either we hit the DS exactly and we do nothing | |
# or we overshot and must now reduce the counts | |
n_ps = len(ps) | |
i_ps = list(range(n_ps)) | |
dds = 0 - ds_value | |
# we must correct elements in ps | |
if dds > 0: | |
while sum(ps) != DS: | |
i = rng_engine.choice(i_ps) | |
if ps[i] > 0: | |
ps[i] -= 1 | |
assert sum(ps) == DS | |
# now we pad ps with zeros for length | |
ps_value = [0 for i in range(N_CD)] | |
ps_value[0:n_ps] = ps[:] | |
# assert that everything is in order | |
assert len(ps_value) == N_CD | |
assert sum(ps_value) == DS | |
# finally we permute the numbers sequence | |
sequence = rng_engine.permutation(ps_value) | |
# now we map the sequence of numbers to proper values | |
s = "".join([rng_engine.choice(RES[i]) for i in sequence]) | |
print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment