Created
June 26, 2018 18:11
-
-
Save neksa/3422357bc9839aad38c4c8e91199f706 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import twobitreader as tbr | |
except: | |
print("twobitreader module required") | |
nucleotides = "ACGT" # this order of nucleotides is important for reversing | |
complementary_nucleotide = dict(zip(nucleotides, reversed(nucleotides))) | |
TWOBIT_GENOMES_PATH = '/net/pan1/mutagene/data/genomes/' | |
def get_context_twobit(mutations, assembly): | |
""" | |
User twobitreader to get context of mutations | |
""" | |
contexts = {} | |
genomes_path = TWOBIT_GENOMES_PATH | |
twobit_files = { | |
38: 'hg38', | |
37: 'hg19', | |
19: 'hg19' | |
} | |
chromosome_name_mapping = { | |
"chr23": "chrX", | |
"chr24": "chrY", | |
"chr25": "chrXY", | |
"chr26": "chrM", | |
} | |
if assembly not in twobit_files: | |
return contexts | |
else: | |
twobit_file = genomes_path + "/" + twobit_files[assembly] + ".2bit" | |
f = tbr.TwoBitFile(twobit_file) | |
cn = complementary_nucleotide | |
for (chrom, pos, x, y) in mutations: | |
start = int(pos) - 1 # zero-based numbering | |
chrom = str(chrom) | |
chromosome = chrom if chrom.startswith('chr') else 'chr' + chrom | |
chromosome = chromosome_name_mapping.get(chromosome, chromosome) | |
nuc5 = 'N' | |
nuc3 = 'N' | |
nuc = 'N' | |
if chromosome in f: | |
try: | |
seq = f[chromosome][start - 1: start + 2] # +/- 1 nucleotide | |
nuc5, nuc, nuc3 = tuple(seq.upper()) | |
except Exception as e: | |
print("TwoBit exception", str(e)) | |
nuc = 'N' | |
# print(chromosome, x, nuc5, nuc, nuc3) | |
if nuc != 'N' and nuc != x: | |
if cn[nuc] == x: | |
nuc3 = cn[nuc5] | |
nuc5 = cn[nuc3] | |
else: | |
print("{}:{} {}>{} {}[{}]{}".format(chromosome, pos, x, y, nuc5, nuc, nuc3)) | |
nuc3 = nuc5 = 'N' | |
else: | |
print("NO CHROM", chromosome) | |
pass | |
contexts[(chrom, pos)] = (nuc5, nuc3) | |
return contexts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment