Last active
July 29, 2023 08:42
-
-
Save sp00nman/e9adb3c7e207c0de03d7 to your computer and use it in GitHub Desktop.
Calculate intron length from gtf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/awk -f | |
BEGIN{OFS="\t"} | |
{ start[NR]=$4; | |
end[NR]=$5; | |
strand[NR]=$7; | |
ID[NR]=$11; | |
chr[NR]=$1; | |
ens[NR]=$9; | |
symb[NR]=$10} | |
END { | |
for (i=1; i<=NR; i++){ | |
if(ID[i]==ID[i+1]){ | |
if(strand[i]=="+"){ | |
intron_start=start[i+1]-1; | |
intron_end=end[i]+1; | |
print chr[i], | |
intron_end, | |
intron_start, | |
"intron", | |
strand[i], | |
ens[i], | |
symb[i], | |
ID[i] | |
} | |
else{ | |
intron_start=end[i]+1; | |
intron_end=start[i+1]-1; | |
print chr[i], | |
intron_start, | |
intron_end, | |
"intron", | |
strand[i], | |
ens[i], | |
symb[i], ID[i] | |
} | |
} | |
} | |
} | |
# Example input | |
==> mod_ucsc_mm10_ensembl_genes_exons.gtf <== | |
chr1 mm10_ensGene exon 134199223 134203590 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000086465 | |
chr1 mm10_ensGene exon 134234015 134234412 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000086465 | |
chr1 mm10_ensGene exon 134235228 134235431 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000086465 | |
chr1 mm10_ensGene exon 134199223 134203590 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000038191 | |
chr1 mm10_ensGene exon 134234015 134235427 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000038191 | |
chr1 mm10_ensGene exon 134199223 134201713 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000169927 | |
chr1 mm10_ensGene exon 134202065 134203590 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000169927 | |
chr1 mm10_ensGene exon 134234015 134234578 0.000000 - . ENSMUSG00000042429 Adora1 ENSMUST00000169927 | |
chr1 mm10_ensGene exon 8361475 8363633 0.000000 - . ENSMUSG00000025909 Sntg1 ENSMUST00000140295 | |
chr1 mm10_ensGene exon 8414203 8414313 0.000000 - . ENSMUSG00000025909 Sntg1 ENSMUST00000140295 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment