Skip to content

Instantly share code, notes, and snippets.

@jebard
Created April 9, 2022 01:20
Show Gist options
  • Save jebard/d67282228d09307e67b1d17c7da204e6 to your computer and use it in GitHub Desktop.
Save jebard/d67282228d09307e67b1d17c7da204e6 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import pysam
import numpy as np
import deeptools.mapReduce
import csv
import argparse
#Tb927_10_v5.1:3648504-3654301 #Tb927.10.14930
#Tb927_10_v5.1:3648504-3654301 (+)
#Score = 1000.0
#Tb927_08_v5.1:1328633-1334535
#Tb927.8.4500 Tb927_08_v5.1:1328633-1334535 (-) Score = 1000.0
reg = "Tb927_08_v5.1:1328633-1334535"
geneChrom = "Tb927_08_v5.1"
geneStart = 1328633#3648504
geneStop = 1334535#3654301
binSize = 10
## first read in our cell to pseudotime table
barcode_file = "barcode-to-pseudotime.txt"
no_tet_bam = "../NO_TET_REP1_extended-2022/outs/possorted_genome_bam.bam"
tet_bam = "../TET_REP1_extended-2022/outs/possorted_genome_bam.bam"
#
# create a hashmap of cell to pseudotime
## create a hashmap of cell to sample
## create a hashmap of cell to matrix of size gene.length
cell_to_pseudo = {}
cell_to_sample = {}
cell_to_matrix = {}
with open(barcode_file) as f:
for line in f:
(c, p, s) = line.split(",")
cell_to_pseudo[c] = p
cell_to_sample[c] = s
cell_to_matrix[c] = [0] * int((round(((geneStop - geneStart)/binSize),0)+1))
## for every cellular barcode assigned to +TET
bam = pysam.AlignmentFile(tet_bam)
## extract out all of the reads mapping to that barcode at the genomic loci of interest
iter = bam.fetch(region=reg)
for x in iter:
while True:
try:
barcode = x.get_tag("CB") +"_2"
if(x.reference_start > geneStart and x.reference_end < geneStop):
readStart = x.reference_start - geneStart
readStop = x.reference_end - geneStart
#print(str(x.reference_name),str(x.reference_start),str(readStart),str(readStop),barcode)
for i in range(readStart,readStop):
## create a matrix row for the read depths at each position across that loci
cell_to_matrix[barcode][int(round(i/binSize,0))] = cell_to_matrix[barcode][int(round(i/binSize,0))] + 1
break
except KeyError:
break
#print("Skipping read with no CB")
## for every cellular barcode assigned to +TET
bam = pysam.AlignmentFile(no_tet_bam)
## extract out all of the reads mapping to that barcode at the genomic loci of interest
#iter = bam.fetch(geneChrom,geneStart,geneStop)
iter = bam.fetch(region=reg)
for x in iter:
while True:
try:
barcode = x.get_tag("CB") +"_1"
if(x.reference_start > geneStart and x.reference_end < geneStop):
readStart = x.reference_start - geneStart
readStop = x.reference_end - geneStart
#print(str(x.reference_name),str(x.reference_start),str(readStart),str(readStop),barcode)
for i in range(readStart,readStop):
## create a matrix row for the read depths at each position across that loci
cell_to_matrix[barcode][int(round(i/binSize,0))] = cell_to_matrix[barcode][int(round(i/binSize,0))] + 1
break
except KeyError:
break
#print("Skipping read with no CB")
## generate a heatmap from the matrix!
## note some foomagic once you export to get it in R (remove brackets)
with open("Tb927.8.4500.matrix.txt", 'w') as f:
for key, value in cell_to_matrix.items():
f.write('%s,%s\n' % (key, value))
#print(cell_to_matrix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment