Created
April 9, 2022 01:20
-
-
Save jebard/d67282228d09307e67b1d17c7da204e6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pysam | |
import numpy as np | |
import deeptools.mapReduce | |
import csv | |
import argparse | |
#Tb927_10_v5.1:3648504-3654301 #Tb927.10.14930 | |
#Tb927_10_v5.1:3648504-3654301 (+) | |
#Score = 1000.0 | |
#Tb927_08_v5.1:1328633-1334535 | |
#Tb927.8.4500 Tb927_08_v5.1:1328633-1334535 (-) Score = 1000.0 | |
reg = "Tb927_08_v5.1:1328633-1334535" | |
geneChrom = "Tb927_08_v5.1" | |
geneStart = 1328633#3648504 | |
geneStop = 1334535#3654301 | |
binSize = 10 | |
## first read in our cell to pseudotime table | |
barcode_file = "barcode-to-pseudotime.txt" | |
no_tet_bam = "../NO_TET_REP1_extended-2022/outs/possorted_genome_bam.bam" | |
tet_bam = "../TET_REP1_extended-2022/outs/possorted_genome_bam.bam" | |
# | |
# create a hashmap of cell to pseudotime | |
## create a hashmap of cell to sample | |
## create a hashmap of cell to matrix of size gene.length | |
cell_to_pseudo = {} | |
cell_to_sample = {} | |
cell_to_matrix = {} | |
with open(barcode_file) as f: | |
for line in f: | |
(c, p, s) = line.split(",") | |
cell_to_pseudo[c] = p | |
cell_to_sample[c] = s | |
cell_to_matrix[c] = [0] * int((round(((geneStop - geneStart)/binSize),0)+1)) | |
## for every cellular barcode assigned to +TET | |
bam = pysam.AlignmentFile(tet_bam) | |
## extract out all of the reads mapping to that barcode at the genomic loci of interest | |
iter = bam.fetch(region=reg) | |
for x in iter: | |
while True: | |
try: | |
barcode = x.get_tag("CB") +"_2" | |
if(x.reference_start > geneStart and x.reference_end < geneStop): | |
readStart = x.reference_start - geneStart | |
readStop = x.reference_end - geneStart | |
#print(str(x.reference_name),str(x.reference_start),str(readStart),str(readStop),barcode) | |
for i in range(readStart,readStop): | |
## create a matrix row for the read depths at each position across that loci | |
cell_to_matrix[barcode][int(round(i/binSize,0))] = cell_to_matrix[barcode][int(round(i/binSize,0))] + 1 | |
break | |
except KeyError: | |
break | |
#print("Skipping read with no CB") | |
## for every cellular barcode assigned to +TET | |
bam = pysam.AlignmentFile(no_tet_bam) | |
## extract out all of the reads mapping to that barcode at the genomic loci of interest | |
#iter = bam.fetch(geneChrom,geneStart,geneStop) | |
iter = bam.fetch(region=reg) | |
for x in iter: | |
while True: | |
try: | |
barcode = x.get_tag("CB") +"_1" | |
if(x.reference_start > geneStart and x.reference_end < geneStop): | |
readStart = x.reference_start - geneStart | |
readStop = x.reference_end - geneStart | |
#print(str(x.reference_name),str(x.reference_start),str(readStart),str(readStop),barcode) | |
for i in range(readStart,readStop): | |
## create a matrix row for the read depths at each position across that loci | |
cell_to_matrix[barcode][int(round(i/binSize,0))] = cell_to_matrix[barcode][int(round(i/binSize,0))] + 1 | |
break | |
except KeyError: | |
break | |
#print("Skipping read with no CB") | |
## generate a heatmap from the matrix! | |
## note some foomagic once you export to get it in R (remove brackets) | |
with open("Tb927.8.4500.matrix.txt", 'w') as f: | |
for key, value in cell_to_matrix.items(): | |
f.write('%s,%s\n' % (key, value)) | |
#print(cell_to_matrix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment