Created
March 30, 2017 05:12
-
-
Save standage/23aba47c678aaaec78812cd62885670c to your computer and use it in GitHub Desktop.
Script for replacing gene IDs with protein IDs for Laurynne's citrus psyllid RNASeq analysis.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse | |
import tag | |
#------------------------------------------------------------------------------ | |
# Declare the command-line interface: | |
# how the user specifies input files and adjusts settings | |
#------------------------------------------------------------------------------ | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--mapping', type=argparse.FileType('w'), metavar='FILE', | |
help='write gene ID -> protein ID mapping to a file for ' | |
'later reference') | |
parser.add_argument('deg', help='diff exp gene table') | |
parser.add_argument('gff3', help='annotation file in GFF3 format') | |
args = parser.parse_args() | |
#------------------------------------------------------------------------------ | |
# The main event | |
#------------------------------------------------------------------------------ | |
# Store the mapping in a "dictionary", a data structure for storing key/value pairs | |
mapping = dict() | |
# Read the GFF3 file to store the gene ID -> protein ID mapping | |
reader = tag.GFF3Reader(infilename=args.gff) | |
genefilter = tag.select.features(reader, type='gene') | |
pmrnafilter = tag.mrna.primary(genefilter) | |
for gene in pmrnafilter: | |
geneid = gene.get_attribute('ID') | |
for child in gene: | |
if child.type == 'CDS': | |
xpid = child.get_attribute('Name') | |
mapping[geneid] = xpid | |
if args.mapping: | |
print(geneid, xpid, sep='\t', file=args.mapping) | |
break | |
header = next(args.deg) | |
print(header, end='') | |
for line in args.deg: | |
values = line.strip().split() | |
geneid = values[0] | |
xpid = mapping[geneid] | |
values[0] = xpid | |
print('\t'.join(values)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment