Created
February 6, 2018 03:53
-
-
Save philippbayer/8bab36e1bd2c9494d85cbc9859459c2a to your computer and use it in GitHub Desktop.
Plots normalised genome densities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(karyoploteR) | |
library(RColorBrewer) | |
# napus lengths | |
napus_v81 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(31163449,31341976,39488917,23309377,28601536,31899769,28903434,21742161,46718920,19955111,47954326,58662067,71849897,61044541,52720299,44607073,52496478,46289801,60205298))) | |
zs11 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(35822560,35332831,49139118,23556787,31473443,36080979,27426921,27741902,45944924,22217279,50776418,68396430,80379030,70552673,44144642,45505716,62442588,46347055,51699380))) | |
napus_v4 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(23267857,24793738,29767491,19151661,23067599,24396387,24006522,18961942,33865341,17398228,38829318,46221805,60573395,48930238,43185228,37225953,44770478,38477088,48508221))) | |
tapidor <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(23876978,27868070,32107674,20993855,20100943,29139755,22657977,16419926,30805108,22463849,31883113,40535026,55243197,45593068,45599631,34070321,37311604,40176764,50979771))) | |
rapa_V21 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA07','chrA09','chrA05','chrA10','chrA03','chrA06','chrA08','chrA04'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(33885993,30435971,29764481,54546899,36115061,18561455,36455010,39861404,27726666,23467636))) | |
rapa_FPSc <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(30984274,32762446,25115506,21597148,28488604,29376281,27477425,23189090,44888296,19914975))) | |
oleracea_v21 <- toGRanges(data.frame(chr=c('chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=c(1,1,1,1,1,1,1,1,1), end=c(43764889,52886896,64984696,53719094,46902586,39822477,48366698,41758686,54679869))) | |
rapa_v15 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(26791029,26939827,31765689,19269590,25303533,25210369,25876097,20826946,38884801,16405181))) | |
fake <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=rep(101, 19))) | |
# ranges(tapidor) has start and end | |
# seqnames(tapidor) has chromosome names | |
# as.vector(seqnames(tapidor)) has them better | |
# end(tapidor) has all lengths | |
standardise_positions <- function(gff, chromosomes) { | |
# this function takes a gff-based Granges and a list of chromosomes (also Granges) | |
# and divides all gene start and end positions in the gff | |
# by the length of the chromosome it's on | |
# I'm making a new GRanges object and appending each chromosome to it | |
new_granges <- GRanges() | |
for(i in seq_along(end(chromosomes))) { | |
chrom <- as.vector(seqnames(chromosomes))[i] | |
length <- end(chromosomes)[i] | |
this_chrom <- gff[grepl(chrom, seqnames(gff)) ] | |
start(this_chrom) <- start(this_chrom) / length * 100 + 1 | |
end(this_chrom) <- end(this_chrom) / length * 100 + 2 | |
new_granges <- append(new_granges, this_chrom) | |
} | |
return(new_granges) | |
} | |
# LOAD THE NAPUS ONES | |
napus_v81_gff <- import.gff('./Brassica_napus/Brassica_Darmor_81/Darmor_v81_assembly.augustus_masked_filtered_renamedFields_sorted_Proper_renamed.gff') | |
# remove exons | |
napus_v81_gff <- napus_v81_gff[napus_v81_gff$type=='gene'] | |
# kick out unplaced chromosomes | |
napus_v81_gff <- napus_v81_gff[seqnames(napus_v81_gff) != 'Unplaced_contigs_v81'] | |
napus_v81_gff <- standardise_positions(napus_v81_gff, napus_v81) | |
stopifnot(length(napus_v81_gff) > 0) # assert that we have a remaining gff | |
napus_v81_gff | |
zs11_gff <- import.gff('./Brassica_napus/Brassica_napus_ZS/Brassica_napus_ZS11_GenesetV201608.gff') | |
zs11_gff <- zs11_gff[zs11_gff$type == 'mRNA'] | |
# keep only chromosomes, heaps of scaffolds | |
zs11_gff <- zs11_gff[grepl('chr*', seqnames(zs11_gff))] | |
zs11_gff <- standardise_positions(zs11_gff, zs11) | |
stopifnot(length(zs11_gff)>0) | |
tapidor_gff <- import.gff('./Brassica_napus/Brassica_napus_Tapidor/Tapidor_v63_assembly.augustus_masked_filtered_fixed_chrom_sorted_Proper_renamed.gff') | |
tapidor_gff <- tapidor_gff[tapidor_gff$type=='gene'] | |
tapidor_gff <- tapidor_gff[seqnames(tapidor_gff) != 'Unplaced'] | |
tapidor_gff <- standardise_positions(tapidor_gff, tapidor) | |
stopifnot(length(tapidor_gff) > 0) | |
napus_v4_gff <- import.gff('./Brassica_napus/Brassica_napus_v5/Brassica_napus.annotation_v5.gff3') | |
napus_v4_gff <- napus_v4_gff[napus_v4_gff$type == 'mRNA'] | |
# kick out all 'random' (unplaced) chromosomes | |
napus_v4_gff <- napus_v4_gff[!grepl('*random', seqnames(napus_v4_gff))] | |
napus_v4_gff <- standardise_positions(napus_v4_gff, napus_v4) | |
stopifnot(length(napus_v4_gff) > 0) | |
# NOW COME THE RAPAS | |
rapa_v21_gff <- import.gff('./Brassica_rapa/Rapa_v2.1/BrapaV2.1PacBio.Chr.gene.gff') | |
rapa_v21_gff <- rapa_v21_gff[rapa_v21_gff$type == 'gene'] | |
rapa_v21_gff <- rapa_v21_gff[!grepl('Scaff*', seqnames(rapa_v21_gff))] | |
rapa_v21_gff <- standardise_positions(rapa_v21_gff, rapa_V21) | |
rapa_FPSc_gff <- import.gff('./Brassica_rapa/FPSc/BrapaFPsc_277_v1.3.gene_exons.gff3') | |
rapa_FPSc_gff <- rapa_FPSc_gff[rapa_FPSc_gff$type == 'gene'] | |
rapa_FPSc_gff <- rapa_FPSc_gff[!grepl('Scaff*', seqnames(rapa_FPSc_gff))] | |
rapa_FPSc_gff <- standardise_positions(rapa_FPSc_gff, rapa_FPSc) | |
rapa_v15_gff <- import.gff('./Brassica_rapa/v1.5/Brapa_gene_v1.5.gff') | |
rapa_v15_gff <- rapa_v15_gff[rapa_v15_gff$type == 'gene'] | |
rapa_v15_gff <- rapa_v15_gff[!grepl('Scaff*', seqnames(rapa_v15_gff))] | |
rapa_v15_gff <- standardise_positions(rapa_v15_gff,rapa_v15) | |
# NOW COMES THE OLERACEA | |
oleracea_v21_gff <- import.gff('./Brassica_oleracea/Oleracea_v2.1/Brassica_oleracea.v2.1.34.gff3') | |
oleracea_v21_gff <- oleracea_v21_gff[oleracea_v21_gff$type == 'mRNA'] | |
oleracea_v21_gff <- oleracea_v21_gff[!grepl('Scaff*', seqnames(oleracea_v21_gff))] | |
oleracea_v21_gff <- standardise_positions(oleracea_v21_gff, oleracea_v21) | |
# Plot gene density | |
cols <- brewer.pal(11, 'Spectral') | |
png('Gene_density.png', width = 800*3, height = 480*3, pointsize=12) | |
kp <- plotKaryotype(genome = fake, plot.type=4, cex=1) | |
kpAddBaseNumbers(kp, tick.dist=100, minor.tick.dist=20, cex=0.5) | |
MAX_DENSITY <- 305 # I want all tracks to have the same y-axis | |
kp <- kpPlotDensity(kp, napus_v4_gff, r0=0, r1=0.1, col=cols[1], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. napus\nv4', pos=1, r0=0, r1=0.1, cex=1, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0, r1=0.1, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, tapidor_gff, r0=0.12, r1=0.22, col=cols[2], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. napus\nTapidor', pos=1, r0=0.12, r1=0.22, cex=1, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.12, r1=0.22, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, zs11_gff, r0=0.24, r1=0.34, col=cols[3], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. napus\nZS11', pos=1, r0=0.24, r1=0.34, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.24, r1=0.34, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, napus_v81_gff, r0=0.36, r1=0.46, col=cols[4], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. napus\nv8.1', pos=1, r0=0.36, r1=0.46, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.36, r1=0.46, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
# Plotting rapa and oleracea results in warnings (no chrC, no chrA found), we can safely ignore that | |
kp <- kpPlotDensity(kp, rapa_v21_gff, r0=0.48, r1=0.58, col=cols[5], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. rapa\nv2.1', pos=1, r0=0.48, r1=0.58, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.48, r1=0.58, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, rapa_FPSc_gff, r0=0.6, r1=0.7, col=cols[6], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. rapa\nFPSc', pos=1, r0=0.6, r1=0.7, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.6, r1=0.7, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, rapa_v15_gff, r0=0.72, r1=0.8, col=cols[7], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. rapa\nv1.5', pos=1, r0=0.72, r1=0.8, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.72, r1=0.8, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
kp <- kpPlotDensity(kp, oleracea_v21_gff, r0=0.82, r1=0.9, col=cols[8], ymax=MAX_DENSITY, window.size=1) | |
kpAddLabels(kp, labels='B. oleracea\nv2.1', pos=1, r0=0.82, r1=0.9, label.margin=0.02) | |
kpAxis(kp, ymin = 0, r0=0.82, r1=0.9, ymax=MAX_DENSITY, cex=0.8, side=2) | |
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment