Created
September 11, 2018 15:45
-
-
Save isaacovercast/01a3c76ccdcd197bee0bac6524ab0081 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dadi_to_momi(infile, outfile=None, verbose=False): | |
if outfile == None: | |
outfile = infile.split(".sfs")[0] + "_momi.sfs" | |
dat = open(infile).readlines() | |
## Get rid of comment lines | |
dat = [x.strip() for x in dat if not x.startswith("#")] | |
if not len(dat) == 3: | |
raise Exception("Malformed dadi sfs {}.\n Must have 3 lines, yours has {}".format(infile, len(dat))) | |
## Parse the info line into nsamps per pop (list of ints), folding flag, and pop names list (if they are given) | |
info = dat[0].split() | |
nsamps = [] | |
## Keep carving off values as long as they cast successfully as int | |
for i in info: | |
try: | |
nsamps.append(int(i)) | |
except: | |
pass | |
nsamps = np.array(nsamps) | |
pops = [x.replace('"', '') for x in info[len(nsamps)+1:]] | |
folded = info[len(nsamps)] | |
folded = False if "un" in folded else True | |
if not len(pops) == len(nsamps): | |
print("Number of populations doesn't agree with number of pop names, using generic names.") | |
pops = ["pop"+x for x in range(len(nsamps))] | |
if verbose: print("Info nsamps={} folded={} pops={}".format(nsamps, folded, pops)) | |
## Get mask | |
mask = list(map(int, dat[2].split())) | |
if verbose: print(mask) | |
## Get sfs, and reshape based on sample sizes | |
sfs = list(map(float, dat[1].split())) | |
if verbose: print(sfs) | |
length = np.ma.array(sfs, mask=mask).sum() | |
sfs = np.array(sfs).reshape(nsamps) | |
if verbose: print("length {}".format(length)) | |
if verbose: print(sfs) | |
## Get counts per sfs bin | |
counts = Counter() | |
for sfsbin in product(*[range(y) for y in [x for x in nsamps]]): | |
## Ignore monomorphic snps | |
## nsamps minus 1 here because of the off by one diff between number | |
## of bins in the sfs and actual number of samples | |
if sfsbin == tuple(nsamps-1) or sfsbin == tuple([0] * len(nsamps)): | |
continue | |
## ignore zero bin counts | |
if sfs[sfsbin] == 0: | |
continue | |
if verbose: print(sfsbin, sfs[sfsbin]), | |
counts.update({sfsbin:sfs[sfsbin]}) | |
if verbose: print("nbins {}".format(len(counts))) | |
## Convert SFS bin style into momi config style | |
configs = pd.DataFrame(index=range(len(counts)), columns=pops) | |
locus_info = [] | |
for i, c in enumerate(counts): | |
## (n-1) here because nbins in dadi sfs is n+1 | |
cfg = np.array([[(n-1)-x, x] for x,n in zip(c, nsamps)]) | |
configs.iloc[i] = [list(map(int, list(x))) for x in cfg] | |
locus_info.append([0, i, counts[c]]) | |
if verbose: print("n_snps {}".format(np.sum([x[2] for x in locus_info]))) | |
## Finally build the momi style sfs dictionary and write it out | |
momi_sfs = {"sampled_pops":pops, | |
"folded":folded, | |
"length":int(length), | |
"configs":configs.values.tolist(), | |
"(locus,config_id,count)":locus_info} | |
with open(outfile, 'w') as out: | |
out.write("{}".format(json.dumps(momi_sfs))) | |
## make it pretty | |
sfs = momi.Sfs.load(outfile) | |
## Fold if unfolded | |
if folded: sfs = sfs.fold() | |
sfs.dump(outfile) | |
#dadi_to_momi(infile, verbose=True) |
Thank you Ludovic, glad you are finding it useful. This code is written for
python 3. 'product' comes from collections, sorry i didn't have the imports
in the gist. If you want to use this code it actually got incorporated into
momi2, so you can use it like this:
sfs = momi.data.convert.sfs_from_dadi(infile)
Good luck!
-isaac
…On Sun, Jun 23, 2019 at 2:07 AM Ludovic Dutoit ***@***.***> wrote:
Hi Isaac, I am having a look at your great little piece of code in order
to transform a dadi sfs to a momi one that I can input in momi.Sfs.load().
That is what I am looking at I believe? I am really grateful for your code
but I am not managing to make it work because I am not sure which module to
load and which version of python you are running. I believe Counter() comes
from collections but at line 42 you got prouct and I am not sure where to
find it. Could you help me? Thank you loads! Ludovic Dutoit
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<https://gist.github.com/01a3c76ccdcd197bee0bac6524ab0081?email_source=notifications&email_token=ABNSXP5QU3CIRSKCAZEL563P34HJFA5CNFSM4H2YFTH2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAFUENY#gistcomment-2951388>,
or mute the thread
<https://github.com/notifications/unsubscribe-auth/ABNSXP6UKPLNIBVTR5EZEZ3P34HJFANCNFSM4H2YFTHQ>
.
Hi Isaac, that is great! I could not track product() as it did not exist in python 2 (unlike Counter()). Thank you for Open Science and great to know it is in Momi. Ludo
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Isaac, I am having a look at your great little piece of code in order to transform a dadi sfs to a momi one that I can input in momi.Sfs.load(). That is what I am looking at I believe? I am really grateful for your code but I am not managing to make it work because I am not sure which module to load and which version of python you are running. I believe Counter() comes from collections but at line 42 you got prouct and I am not sure where to find it. Could you help me? Thank you loads! Ludovic Dutoit