Skip to content

Instantly share code, notes, and snippets.

@bluegenes
Last active July 24, 2018 00:03
Show Gist options
  • Save bluegenes/96712d351e129810d90123b89a645067 to your computer and use it in GitHub Desktop.
Save bluegenes/96712d351e129810d90123b89a645067 to your computer and use it in GitHub Desktop.
Take in a list of trinity gene names (e.g. from gene-level DE using salmon), a dammit namemap, and a dammit fasta file. Output fasta of matching dammit contigs.
###################################################################
"""Function: Take in a list of trinity gene names, a dammit namemap,
and a dammit fasta file. Output fasta of matching dammit contigs.
"""
###################################################################
import sys
import os
import argparse
import screed
def trin_to_dammit(trin_names,namemap,outdir,fasta, outFasta):
with open(trin_names, 'r') as f:
contigSet = set( [ x.strip() for x in f])
with open(namemap, 'r') as f:
trin_to_dammit = {y:x.split(' ')[0] for x,y in split_more(f)}
with open(outFasta, 'w') as outF:
with screed.open(fasta) as seqs:
for read in seqs:
name = read.name.split(' ')[0]
trinName = trin_to_dammit[name]
if trinName:
trinGene = trinName.rsplit('_',1)[0]
if (trinGene in contigSet) or (trinName in contigSet):
outF.write('>' + read.name + '\n' + read.sequence + '\n')
#really quite unnecessary generator to "simplify" reading namemap into dict
def split_more(file_object):
for line in file_object:
yield line.strip().rsplit(',', 1)
if __name__ == '__main__':
"""Function: Take in a list of trinity gene names, a dammit namemap,
and a dammit fasta file. Output fasta of matching dammit contigs.
"""
psr = argparse.ArgumentParser()
psr.add_argument('--fasta')
psr.add_argument('--namemap')
psr.add_argument('--trinity_names')
psr.add_argument('-o', '--outdir', default=os.getcwd())
psr.add_argument('--outFasta')
args = psr.parse_args()
trin_to_dammit(args.trinity_names,args.namemap,args.outdir,args.fasta, args.outFasta)
@bluegenes
Copy link
Author

allow trinity gene OR transcript name input

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment