-
-
Save timyates/89d35075b969af50a3f9 to your computer and use it in GitHub Desktop.
Take a transcript or vector of transcripts, and using xmapcore return the probes that hit the translated region of the transceript and where they hit relative to the translated sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find the probe hit locations relative to the 5' end of a translated transcript | |
# | |
# Things to note: | |
# | |
# 1) This will not return probes which hit across exon junctions. | |
# 2) The 'sequence' column will always return the sequence of the | |
# probe, not the transcript sequence. This should especially be | |
# when looking at the reverse strand. | |
transcript.to.translatedprobes = function( transcript.ids ) { | |
# Get all the exons for this transcript | |
exons = transcript.to.exon( transcript.ids, as.vector=F ) | |
sapply( split( exons[['stable_id']], exons[['IN1']] ), function( exons ) { | |
exons = exon.details( exons ) | |
fwd.strand = all( exons$strand == 1 ) | |
translated.width = sum( width( exons ) ) | |
# Go along each exon | |
probes = sapply( seq_along( exons$stable_id ), function( eidx ) { | |
# Get the details for this exon | |
exon = exons[ eidx, ] | |
# Find the probes which hit the same region as the exon | |
probes = probes.in.range( as.character( space( exon ) ), | |
start( exon ), | |
end( exon ), | |
exon$strand, | |
as.vector=F ) | |
if( !is.null( probes ) ) { | |
# Remove those that hang off the ends of the exon | |
probes = probes[ start( probes ) >= start( exon ), ] | |
probes = probes[ end( probes ) <= end( exon ), ] | |
} | |
# Return the probes for this exon | |
probes | |
} ) | |
# Then, change these offsets so that they are relative to the start of the | |
# transcript | |
exon.offset = 0 | |
for( idx in seq_along( exons$stable_id ) ) { | |
# For the reverse strand, go through the list backwards | |
eidx = if( fwd.strand ) idx else length( exons$stable_id ) - idx + 1 | |
if( !is.null( probes[[ eidx ]] ) ) { | |
ranges( probes[[ eidx ]] ) = shift( ranges( probes[[ eidx ]] ), | |
-start( exons[ eidx, ] ) + exon.offset ) | |
# If on the reverse strand, change offsets to 5' from 3' | |
if( !fwd.strand ) { | |
ranges( probes[[ eidx ]] ) = IRanges( start=translated.width - end( probes[[ eidx ]] ), | |
end=translated.width - start( probes[[ eidx ]] ) ) | |
# Re-sort the probes by start | |
probes[[ eidx ]] = probes[[ eidx ]][ order( start( probes[[ eidx ]] ) ), ] | |
} | |
} | |
exon.offset = exon.offset + width( exons[ eidx, ] ) | |
} | |
# Remove all NULLs and combine | |
probes = do.call( 'rbind', probes[ !sapply( probes, is.null ) ] ) | |
} ) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> transcript.to.translatedprobes( c( 'ENST00000371953', 'ENST00000462694', 'ENST00000329958' ) ) | |
$ENST00000329958 | |
RangedData with 8 rows and 3 value columns across 1 space | |
space ranges | sequence probe_hit_count strand | |
<character> <IRanges> | <character> <numeric> <integer> | |
1 10 [ 69, 93] | CCTCCCGGGTTCAAGCAATTCTCCT 2 -1 | |
2 10 [ 70, 94] | CTCCCGGGTTCAAGCAATTCTCCTG 2 -1 | |
3 10 [ 71, 95] | TCCCGGGTTCAAGCAATTCTCCTGC 2 -1 | |
4 10 [ 75, 99] | GGGTTCAAGCAATTCTCCTGCCTCA 2 -1 | |
5 10 [ 87, 111] | TTCTCCTGCCTCAGCCTCCCGAGTA 2 -1 | |
6 10 [ 88, 112] | TCTCCTGCCTCAGCCTCCCGAGTAG 2 -1 | |
7 10 [ 90, 114] | TCCTGCCTCAGCCTCCCGAGTAGCT 2 -1 | |
8 10 [199, 223] | TTGAACTCCTGACCTCAAGTGATCC 2 -1 | |
$ENST00000371953 | |
RangedData with 110 rows and 3 value columns across 1 space | |
space ranges | sequence probe_hit_count strand | |
<character> <IRanges> | <character> <numeric> <integer> | |
1 10 [ 7, 31] | TCAGACTCGAGTCAGTGACACTGCT 1 1 | |
2 10 [ 29, 53] | GCTCAACGCACCCATCTCAGCTTTC 1 1 | |
3 10 [ 46, 70] | CAGCTTTCATCATCAGTCCTCCACC 1 1 | |
4 10 [ 90, 114] | CCCTGCCTCCGGCTGGGTTTCTGGG 1 1 | |
5 10 [444, 468] | TGATGTGGCGGGACTCTTTATGCGC 1 1 | |
6 10 [463, 487] | ATGCGCTGCGGCAGGATACGCGCTC 1 1 | |
7 10 [492, 516] | CTGGGACGCGACTGCGCTCAGTTCT 1 1 | |
8 10 [545, 569] | AAGTTTGAGAGTTGAGCCGCTGTGA 1 1 | |
9 10 [714, 738] | CTCCTCTTCGTCTTTTCTAACCGTG 1 1 | |
... ... ... ... ... ... ... | |
102 10 [5453, 5477] | TGGTTGCCACAAAGTGCCTCGTTTA 1 1 | |
103 10 [6137, 6161] | AGCGTTCTAGTCCTGAAAAAGTCCA 1 1 | |
104 10 [7445, 7469] | CCTGGAGACATAGCCATCGTTAATA 1 1 | |
105 10 [8061, 8085] | GCTCTGAAAATTACCGCGTTTAGTA 1 1 | |
106 10 [8624, 8648] | TTGCCCTCATGTCCTAATGTGCAGT 1 1 | |
107 10 [8924, 8948] | TATATCAAGTTCCATTTGGCTACTG 1 1 | |
108 10 [8939, 8963] | TTGGCTACTGATGGACAAAAAATAG 1 1 | |
109 10 [8961, 8985] | TAGAAATGCCTTCCTATGGAGAGTA 1 1 | |
110 10 [8972, 8996] | TCCTATGGAGAGTATTTTTCCTTTA 1 1 | |
$ENST00000462694 | |
RangedData with 11 rows and 3 value columns across 1 space | |
space ranges | sequence probe_hit_count strand | |
<character> <IRanges> | <character> <numeric> <integer> | |
1 10 [ 19, 43] | AGAGATCGTTAGCAGAAACAAAAGG 2 1 | |
2 10 [ 33, 57] | GAAACAAAAGGAGATATCAAGAGGA 2 1 | |
3 10 [ 46, 70] | ATATCAAGAGGATGGATTCGACTTA 2 1 | |
4 10 [ 50, 74] | CAAGAGGATGGATTCGACTTAGACT 2 1 | |
5 10 [ 56, 80] | GATGGATTCGACTTAGACTTGACCT 2 1 | |
6 10 [121, 145] | AAGACTTGAAGGCGTATACAGGAAC 2 1 | |
7 10 [137, 161] | TACAGGAACAATATTGATGATGTAG 2 1 | |
8 10 [255, 279] | AGATACTTTGTGATGTAAACTATTA 1 1 | |
9 10 [290, 314] | CTATAATCATTTTTTGGCTTACCGT 1 1 | |
10 10 [305, 329] | GGCTTACCGTACCTAATGGACTTCA 1 1 | |
11 10 [331, 355] | GGGGATACAGTTCATTTGATAAGAA 1 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment