Skip to content

Instantly share code, notes, and snippets.

@neksa
Created June 26, 2018 18:11
Show Gist options
  • Save neksa/3422357bc9839aad38c4c8e91199f706 to your computer and use it in GitHub Desktop.
Save neksa/3422357bc9839aad38c4c8e91199f706 to your computer and use it in GitHub Desktop.
try:
import twobitreader as tbr
except:
print("twobitreader module required")
nucleotides = "ACGT" # this order of nucleotides is important for reversing
complementary_nucleotide = dict(zip(nucleotides, reversed(nucleotides)))
TWOBIT_GENOMES_PATH = '/net/pan1/mutagene/data/genomes/'
def get_context_twobit(mutations, assembly):
"""
User twobitreader to get context of mutations
"""
contexts = {}
genomes_path = TWOBIT_GENOMES_PATH
twobit_files = {
38: 'hg38',
37: 'hg19',
19: 'hg19'
}
chromosome_name_mapping = {
"chr23": "chrX",
"chr24": "chrY",
"chr25": "chrXY",
"chr26": "chrM",
}
if assembly not in twobit_files:
return contexts
else:
twobit_file = genomes_path + "/" + twobit_files[assembly] + ".2bit"
f = tbr.TwoBitFile(twobit_file)
cn = complementary_nucleotide
for (chrom, pos, x, y) in mutations:
start = int(pos) - 1 # zero-based numbering
chrom = str(chrom)
chromosome = chrom if chrom.startswith('chr') else 'chr' + chrom
chromosome = chromosome_name_mapping.get(chromosome, chromosome)
nuc5 = 'N'
nuc3 = 'N'
nuc = 'N'
if chromosome in f:
try:
seq = f[chromosome][start - 1: start + 2] # +/- 1 nucleotide
nuc5, nuc, nuc3 = tuple(seq.upper())
except Exception as e:
print("TwoBit exception", str(e))
nuc = 'N'
# print(chromosome, x, nuc5, nuc, nuc3)
if nuc != 'N' and nuc != x:
if cn[nuc] == x:
nuc3 = cn[nuc5]
nuc5 = cn[nuc3]
else:
print("{}:{} {}>{} {}[{}]{}".format(chromosome, pos, x, y, nuc5, nuc, nuc3))
nuc3 = nuc5 = 'N'
else:
print("NO CHROM", chromosome)
pass
contexts[(chrom, pos)] = (nuc5, nuc3)
return contexts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment