Skip to content

Instantly share code, notes, and snippets.

@adamewing
Last active February 21, 2023 05:20
Show Gist options
  • Save adamewing/331e1780975969afcebc2996ddb8c7a2 to your computer and use it in GitHub Desktop.
Save adamewing/331e1780975969afcebc2996ddb8c7a2 to your computer and use it in GitHub Desktop.
scATAC-seq script for Bodea et al. (Author: Geoff Faulkner)
#! /python22/Lib
import string,sys,os,math,re,random,cmd, time
def initiate(input, output, coverage_coordinates, gene_coordinates, chromosomeSizes):
##define genome and chromosome sizes for random coverage calculations
chromosomeKey = {}
f = open(chromosomeSizes)
start = 0
stop = 0
for thisline in f:
data = str.split(thisline)
if (stop == 0):
stop = stop + int(data[1])
else:
start = stop
stop = stop + int(data[1])
chromosomeKey[str(start)+"_"+str(stop)] = data[0]
f.close()
genomeSize = stop
plotKey = []
for i in range(20000):
plotKey.append(0)
##define which genes to check for UMIs with reads over those intervals
geneKey = {}
f = open(gene_coordinates)
for thisline in f:
data = str.split(thisline)
chromosome = data[1]
start = int(int(data[2])/10)
stop = int(int(data[3])/10)
strand = data[4]
try:
geneKey[chromosome]
except:
geneKey[chromosome] = {}
for i in range(start,stop+1):
geneKey[chromosome][i] = "Y"
f.close()
##define which intervals to consider for coverage plot
coverageKey = {}
f = open(coverage_coordinates)
for thisline in f:
data = str.split(thisline)
chromosome = data[1]
start = int(int(data[2])/10)
stop = int(int(data[3])/10)
interval = stop - start
strand = data[4]
try:
coverageKey[chromosome]
except:
coverageKey[chromosome] = {}
if (strand == "+"):
for i in range(20000):
coverageKey[chromosome][start-10000+i] = i
else:
for i in range(20000):
coverageKey[chromosome][stop-10000+i] = 20000-i-1
f.close()
##accumulate UMI statistics
UMIkey = {}
used = {}
f = open(input)
for thisline in f:
if (thisline[0] != "@"):
data = str.split(thisline)
if (data[11][0] == "N"):
readID = data[0]
if ((int(data[8]) <= 1000) and (int(data[8]) > 0)):
try:
used[readID]
except:
used[readID] = "Y"
start = int(int(data[3])/10)
stop = int((int(data[3])+int(data[8]))/10)
UMI = str.split(readID, ".")[0]+"."+str.split(readID, ".")[1]
try:
UMIkey[UMI]
except:
UMIkey[UMI] = [0,0]
chromosome = data[2]
result = "N"
try:
result = geneKey[chromosome][start]
except:
try:
result = geneKey[chromosome][stop]
except:
pass
if (result != "N"):
UMIkey[UMI][0] = UMIkey[UMI][0] + 1
UMIkey[UMI][1] = UMIkey[UMI][1] + 1
f.close()
##calculate coverage for selected UMIs
used = {}
f = open(input)
for thisline in f:
if (thisline[0] != "@"):
data = str.split(thisline)
readID = data[0]
if ((int(data[8]) <= 1000) and (int(data[8]) > 0)):
try:
used[readID]
except:
used[readID] = "Y"
UMI = str.split(readID, ".")[0]+"."+str.split(readID, ".")[1]
if ((UMIkey[UMI][0] > 0) and (UMIkey[UMI][1] > 10000)):
chromosome = data[2]
start = int(int(data[3])/10)
stop = int((int(data[3])+int(data[8]))/10)
for i in range(start,stop):
coordinate = -1
try:
coordinate = coverageKey[chromosome][i]
except:
pass
if (coordinate != -1):
plotKey[coordinate] = plotKey[coordinate] + 1
f.close()
o = open(output, "w")
for i in range(20000):
o.write(str(i)+"\t"+str(plotKey[i])+"\n")
o.close()
initiate(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
## 1:input best match sam file
## 2:output coverage histogram file
## 3:genomic coordinate file to calculate coverage over
## 4:genomic coordinate file to include UMIs
## 5:chromosome sizes for random analysis
## usage is e.g. python analyseATACsingle.py hippocampus.single.hg38.aligned.best.sam hippocampus.single_Camk2a_L1HS_coverage.txt L1HS_TSS_coordinates.hg38.txt Camk2a.txt hg38.genome
sys.exit()
Camk2a chr5 150289093 150291093 -
Gfap chr17 44915750 44917750 -
chr1 248956422
chr2 242193529
chr3 198295559
chr4 190214555
chr5 181538259
chr6 170805979
chr7 159345973
chr8 145138636
chr9 138394717
chr10 133797422
chr11 135086622
chr12 133275309
chr13 114364328
chr14 107043718
chr15 101991189
chr16 90338345
chr17 83257441
chr18 80373285
chr19 58617616
chr20 64444167
chr21 46709983
chr22 50818468
chrX 156040895
chrY 57227415
L1HS chr1 34572104 34572105 -
L1HS chr1 67084917 67084918 -
L1HS chr1 71513700 71513701 +
L1HS chr1 80945262 80945263 -
L1HS chr1 84058405 84058406 -
L1HS chr1 85748521 85748522 +
L1HS chr1 85927066 85927067 +
L1HS chr1 86685112 86685113 -
L1HS chr1 104776274 104776275 -
L1HS chr1 104849863 104849864 -
L1HS chr1 118852351 118852352 +
L1HS chr1 121538258 121538259 -
L1HS chr1 166249083 166249084 -
L1HS chr1 180872842 180872843 -
L1HS chr1 184845616 184845617 +
L1HS chr1 187349791 187349792 -
L1HS chr1 187597671 187597672 +
L1HS chr1 196225402 196225403 -
L1HS chr1 197707716 197707717 +
L1HS chr1 218015251 218015252 -
L1HS chr1 237075264 237075265 +
L1HS chr1 247687175 247687176 +
L1HS chr10 5245356 5245357 +
L1HS chr10 6375667 6375668 -
L1HS chr10 19094617 19094618 -
L1HS chr10 33516875 33516876 -
L1HS chr10 85355505 85355506 +
L1HS chr10 98788967 98788968 -
L1HS chr10 105383377 105383378 -
L1HS chr10 105781551 105781552 -
L1HS chr10 109818453 109818454 -
L1HS chr11 24327949 24327950 +
L1HS chr11 31321681 31321682 -
L1HS chr11 36557633 36557634 -
L1HS chr11 48853723 48853724 -
L1HS chr11 78683801 78683802 -
L1HS chr11 82155865 82155866 +
L1HS chr11 85324757 85324758 +
L1HS chr11 87346057 87346058 -
L1HS chr11 90406095 90406096 -
L1HS chr11 90972298 90972299 -
L1HS chr11 93136638 93136639 +
L1HS chr11 93420986 93420987 +
L1HS chr11 95436216 95436217 +
L1HS chr11 109183523 109183524 -
L1HS chr11 125536609 125536610 +
L1HS chr12 3505228 3505229 -
L1HS chr12 38799646 38799647 +
L1HS chr12 44114230 44114231 -
L1HS chr12 51568653 51568654 -
L1HS chr12 54788576 54788577 +
L1HS chr12 55102279 55102280 -
L1HS chr12 69779441 69779442 -
L1HS chr12 73289668 73289669 -
L1HS chr12 74874871 74874872 +
L1HS chr12 126305037 126305038 -
L1HS chr13 29647703 29647704 -
L1HS chr13 31302314 31302315 +
L1HS chr13 34486290 34486291 -
L1HS chr13 48465555 48465556 +
L1HS chr13 74426326 74426327 +
L1HS chr13 76618851 76618852 -
L1HS chr13 92691589 92691590 -
L1HS chr13 97673436 97673437 -
L1HS chr13 108516494 108516495 -
L1HS chr14 18289860 18289861 -
L1HS chr14 30690832 30690833 -
L1HS chr14 44711104 44711105 -
L1HS chr14 45483166 45483167 -
L1HS chr14 48187085 48187086 +
L1HS chr14 51800631 51800632 -
L1HS chr14 61061147 61061148 -
L1HS chr14 62535851 62535852 +
L1HS chr14 63122728 63122729 -
L1HS chr14 70547296 70547297 +
L1HS chr15 51423245 51423246 -
L1HS chr15 54932100 54932101 -
L1HS chr15 82888920 82888921 -
L1HS chr15 83456835 83456836 -
L1HS chr15 87509891 87509892 +
L1HS chr16 9590521 9590522 -
L1HS chr16 16846555 16846556 -
L1HS chr16 33958611 33958612 -
L1HS chr16 35614500 35614501 -
L1HS chr16 54042096 54042097 +
L1HS chr16 65690011 65690012 +
L1HS chr16 68589502 68589503 -
L1HS chr16 83637252 83637253 +
L1HS chr17 9615984 9615985 +
L1HS chr17 66602591 66602592 -
L1HS chr17 70458955 70458956 +
L1HS chr17 70550840 70550841 -
L1HS chr18 535703 535704 +
L1HS chr18 13981887 13981888 -
L1HS chr18 37819739 37819740 +
L1HS chr18 41775013 41775014 +
L1HS chr18 47660373 47660374 +
L1HS chr18 50349986 50349987 -
L1HS chr18 62906292 62906293 +
L1HS chr18 70752580 70752581 -
L1HS chr18 72972556 72972557 -
L1HS chr18 75846854 75846855 +
L1HS chr2 4733731 4733732 +
L1HS chr2 11002136 11002137 -
L1HS chr2 16599754 16599755 -
L1HS chr2 36118537 36118538 -
L1HS chr2 71417500 71417501 -
L1HS chr2 86661269 86661270 -
L1HS chr2 102572382 102572383 -
L1HS chr2 112509842 112509843 -
L1HS chr2 126184061 126184062 -
L1HS chr2 143860956 143860957 +
L1HS chr2 148188745 148188746 +
L1HS chr2 153013798 153013799 -
L1HS chr2 160114515 160114516 +
L1HS chr2 166988454 166988455 +
L1HS chr2 169248621 169248622 +
L1HS chr2 172315270 172315271 +
L1HS chr2 175487993 175487994 -
L1HS chr2 177973072 177973073 +
L1HS chr2 193218448 193218449 -
L1HS chr2 196905586 196905587 +
L1HS chr2 232155229 232155230 -
L1HS chr20 7122196 7122197 -
L1HS chr20 11632779 11632780 +
L1HS chr20 12807043 12807044 -
L1HS chr20 23426108 23426109 +
L1HS chr20 55859565 55859566 +
L1HS chr22 28663285 28663286 +
L1HS chr22 34555709 34555710 -
L1HS chr22 48991792 48991793 -
L1HS chr3 3963076 3963077 +
L1HS chr3 4916537 4916538 +
L1HS chr3 26404044 26404045 -
L1HS chr3 46789134 46789135 -
L1HS chr3 54400322 54400323 -
L1HS chr3 77769674 77769675 -
L1HS chr3 89466856 89466857 -
L1HS chr3 90175594 90175595 -
L1HS chr3 103562565 103562566 -
L1HS chr3 108749400 108749401 +
L1HS chr3 109199872 109199873 +
L1HS chr3 116359999 116360000 +
L1HS chr3 132952033 132952034 -
L1HS chr3 136485103 136485104 -
L1HS chr3 136969736 136969737 -
L1HS chr3 158019675 158019676 +
L1HS chr3 159101393 159101394 -
L1HS chr3 163236941 163236942 +
L1HS chr4 15841549 15841550 +
L1HS chr4 19083928 19083929 -
L1HS chr4 21159390 21159391 +
L1HS chr4 23614770 23614771 +
L1HS chr4 48057983 48057984 -
L1HS chr4 52816462 52816463 +
L1HS chr4 57568347 57568348 -
L1HS chr4 59078847 59078848 +
L1HS chr4 61945958 61945959 -
L1HS chr4 62732968 62732969 -
L1HS chr4 74723583 74723584 -
L1HS chr4 78347980 78347981 +
L1HS chr4 79704552 79704553 +
L1HS chr4 79937717 79937718 +
L1HS chr4 79966907 79966908 +
L1HS chr4 87353143 87353144 -
L1HS chr4 90681754 90681755 -
L1HS chr4 91437497 91437498 +
L1HS chr4 91978213 91978214 +
L1HS chr4 93614338 93614339 -
L1HS chr4 93644338 93644339 -
L1HS chr4 98598463 98598464 -
L1HS chr4 106577069 106577070 -
L1HS chr4 136299545 136299546 -
L1HS chr4 166569979 166569980 +
L1HS chr4 169521531 169521532 -
L1HS chr4 189137085 189137086 +
L1HS chr5 13416500 13416501 +
L1HS chr5 15912551 15912552 -
L1HS chr5 34147842 34147843 +
L1HS chr5 58384177 58384178 +
L1HS chr5 79784937 79784938 -
L1HS chr5 81616090 81616091 +
L1HS chr5 86510689 86510690 +
L1HS chr5 102131358 102131359 +
L1HS chr5 104518587 104518588 +
L1HS chr5 105519274 105519275 -
L1HS chr5 109259386 109259387 +
L1HS chr5 111302238 111302239 +
L1HS chr5 119690813 119690814 -
L1HS chr5 133583288 133583289 +
L1HS chr5 146609485 146609486 +
L1HS chr5 152076868 152076869 +
L1HS chr5 152892473 152892474 -
L1HS chr5 153077007 153077008 -
L1HS chr5 156067966 156067967 -
L1HS chr5 160715638 160715639 -
L1HS chr5 166972814 166972815 -
L1HS chr5 173408828 173408829 -
L1HS chr5 177772244 177772245 +
L1HS chr6 2423799 2423800 -
L1HS chr6 19764892 19764893 +
L1HS chr6 24817703 24817704 -
L1HS chr6 51874783 51874784 +
L1HS chr6 70010349 70010350 +
L1HS chr6 72994686 72994687 -
L1HS chr6 83333951 83333952 +
L1HS chr6 84608435 84608436 +
L1HS chr6 86005072 86005073 -
L1HS chr6 93871777 93871778 -
L1HS chr6 94204053 94204054 -
L1HS chr6 112703748 112703749 +
L1HS chr6 117102130 117102131 +
L1HS chr6 121162716 121162717 +
L1HS chr6 128998385 128998386 +
L1HS chr6 133026742 133026743 -
L1HS chr6 156034137 156034138 +
L1HS chr6 156331010 156331011 -
L1HS chr7 7465092 7465093 +
L1HS chr7 25047890 25047891 -
L1HS chr7 30439241 30439242 +
L1HS chr7 49686301 49686302 -
L1HS chr7 63154859 63154860 -
L1HS chr7 66292884 66292885 -
L1HS chr7 70197331 70197332 +
L1HS chr7 93793675 93793676 -
L1HS chr7 96852681 96852682 -
L1HS chr7 97613655 97613656 +
L1HS chr7 111249545 111249546 -
L1HS chr7 111963196 111963197 +
L1HS chr7 113782152 113782153 -
L1HS chr7 141068042 141068043 -
L1HS chr7 141926709 141926710 -
L1HS chr7 147848623 147848624 -
L1HS chr8 8470858 8470859 +
L1HS chr8 27113618 27113619 +
L1HS chr8 72881587 72881588 -
L1HS chr8 75627354 75627355 -
L1HS chr8 88685708 88685709 +
L1HS chr8 91528120 91528121 -
L1HS chr8 104745873 104745874 -
L1HS chr8 128459020 128459021 -
L1HS chr8 134076772 134076773 -
L1HS chr8 135875861 135875862 +
L1HS chr8 136444101 136444102 -
L1HS chr9 83055571 83055572 -
L1HS chr9 90149606 90149607 +
L1HS chr9 94119564 94119565 -
L1HS chr9 95697585 95697586 +
L1HS chr9 110417105 110417106 +
L1HS chr9 110791097 110791098 +
L1HS chr9 112804158 112804159 -
L1HS chrX 11707247 11707248 +
L1HS chrX 11941314 11941315 -
L1HS chrX 23238518 23238519 +
L1HS chrX 26320445 26320446 -
L1HS chrX 50025504 50025505 -
L1HS chrX 54124745 54124746 -
L1HS chrX 56695883 56695884 +
L1HS chrX 64019286 64019287 -
L1HS chrX 73106145 73106146 +
L1HS chrX 76233354 76233355 +
L1HS chrX 76328815 76328816 -
L1HS chrX 81841152 81841153 +
L1HS chrX 83065634 83065635 -
L1HS chrX 83542396 83542397 +
L1HS chrX 87821410 87821411 +
L1HS chrX 96063839 96063840 -
L1HS chrX 106469285 106469286 +
L1HS chrX 119441492 119441493 -
L1HS chrX 130517377 130517378 +
L1HS chrX 142477847 142477848 +
L1HS chrX 147653737 147653738 +
L1HS chrX 155522048 155522049 -
L1HS chrY 3443552 3443553 +
L1HS chrY 4954940 4954941 -
L1HS chrY 5612195 5612196 -
L1HS chrY 9941130 9941131 +
Pvalb chr22 36816079 36818079 -
Vip chr6 152749797 152751797 +
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment