Skip to content

Instantly share code, notes, and snippets.

@philippbayer
Created February 6, 2018 03:53
Show Gist options
  • Save philippbayer/8bab36e1bd2c9494d85cbc9859459c2a to your computer and use it in GitHub Desktop.
Save philippbayer/8bab36e1bd2c9494d85cbc9859459c2a to your computer and use it in GitHub Desktop.
Plots normalised genome densities
library(karyoploteR)
library(RColorBrewer)
# napus lengths
napus_v81 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(31163449,31341976,39488917,23309377,28601536,31899769,28903434,21742161,46718920,19955111,47954326,58662067,71849897,61044541,52720299,44607073,52496478,46289801,60205298)))
zs11 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(35822560,35332831,49139118,23556787,31473443,36080979,27426921,27741902,45944924,22217279,50776418,68396430,80379030,70552673,44144642,45505716,62442588,46347055,51699380)))
napus_v4 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(23267857,24793738,29767491,19151661,23067599,24396387,24006522,18961942,33865341,17398228,38829318,46221805,60573395,48930238,43185228,37225953,44770478,38477088,48508221)))
tapidor <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=c(23876978,27868070,32107674,20993855,20100943,29139755,22657977,16419926,30805108,22463849,31883113,40535026,55243197,45593068,45599631,34070321,37311604,40176764,50979771)))
rapa_V21 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA07','chrA09','chrA05','chrA10','chrA03','chrA06','chrA08','chrA04'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(33885993,30435971,29764481,54546899,36115061,18561455,36455010,39861404,27726666,23467636)))
rapa_FPSc <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(30984274,32762446,25115506,21597148,28488604,29376281,27477425,23189090,44888296,19914975)))
oleracea_v21 <- toGRanges(data.frame(chr=c('chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=c(1,1,1,1,1,1,1,1,1), end=c(43764889,52886896,64984696,53719094,46902586,39822477,48366698,41758686,54679869)))
rapa_v15 <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10'), start=c(1,1,1,1,1,1,1,1,1,1), end=c(26791029,26939827,31765689,19269590,25303533,25210369,25876097,20826946,38884801,16405181)))
fake <- toGRanges(data.frame(chr=c('chrA01','chrA02','chrA03','chrA04','chrA05','chrA06','chrA07','chrA08','chrA09','chrA10','chrC01','chrC02','chrC03','chrC04','chrC05','chrC06','chrC07','chrC08','chrC09'), start=rep(1, 19), end=rep(101, 19)))
# ranges(tapidor) has start and end
# seqnames(tapidor) has chromosome names
# as.vector(seqnames(tapidor)) has them better
# end(tapidor) has all lengths
standardise_positions <- function(gff, chromosomes) {
# this function takes a gff-based Granges and a list of chromosomes (also Granges)
# and divides all gene start and end positions in the gff
# by the length of the chromosome it's on
# I'm making a new GRanges object and appending each chromosome to it
new_granges <- GRanges()
for(i in seq_along(end(chromosomes))) {
chrom <- as.vector(seqnames(chromosomes))[i]
length <- end(chromosomes)[i]
this_chrom <- gff[grepl(chrom, seqnames(gff)) ]
start(this_chrom) <- start(this_chrom) / length * 100 + 1
end(this_chrom) <- end(this_chrom) / length * 100 + 2
new_granges <- append(new_granges, this_chrom)
}
return(new_granges)
}
# LOAD THE NAPUS ONES
napus_v81_gff <- import.gff('./Brassica_napus/Brassica_Darmor_81/Darmor_v81_assembly.augustus_masked_filtered_renamedFields_sorted_Proper_renamed.gff')
# remove exons
napus_v81_gff <- napus_v81_gff[napus_v81_gff$type=='gene']
# kick out unplaced chromosomes
napus_v81_gff <- napus_v81_gff[seqnames(napus_v81_gff) != 'Unplaced_contigs_v81']
napus_v81_gff <- standardise_positions(napus_v81_gff, napus_v81)
stopifnot(length(napus_v81_gff) > 0) # assert that we have a remaining gff
napus_v81_gff
zs11_gff <- import.gff('./Brassica_napus/Brassica_napus_ZS/Brassica_napus_ZS11_GenesetV201608.gff')
zs11_gff <- zs11_gff[zs11_gff$type == 'mRNA']
# keep only chromosomes, heaps of scaffolds
zs11_gff <- zs11_gff[grepl('chr*', seqnames(zs11_gff))]
zs11_gff <- standardise_positions(zs11_gff, zs11)
stopifnot(length(zs11_gff)>0)
tapidor_gff <- import.gff('./Brassica_napus/Brassica_napus_Tapidor/Tapidor_v63_assembly.augustus_masked_filtered_fixed_chrom_sorted_Proper_renamed.gff')
tapidor_gff <- tapidor_gff[tapidor_gff$type=='gene']
tapidor_gff <- tapidor_gff[seqnames(tapidor_gff) != 'Unplaced']
tapidor_gff <- standardise_positions(tapidor_gff, tapidor)
stopifnot(length(tapidor_gff) > 0)
napus_v4_gff <- import.gff('./Brassica_napus/Brassica_napus_v5/Brassica_napus.annotation_v5.gff3')
napus_v4_gff <- napus_v4_gff[napus_v4_gff$type == 'mRNA']
# kick out all 'random' (unplaced) chromosomes
napus_v4_gff <- napus_v4_gff[!grepl('*random', seqnames(napus_v4_gff))]
napus_v4_gff <- standardise_positions(napus_v4_gff, napus_v4)
stopifnot(length(napus_v4_gff) > 0)
# NOW COME THE RAPAS
rapa_v21_gff <- import.gff('./Brassica_rapa/Rapa_v2.1/BrapaV2.1PacBio.Chr.gene.gff')
rapa_v21_gff <- rapa_v21_gff[rapa_v21_gff$type == 'gene']
rapa_v21_gff <- rapa_v21_gff[!grepl('Scaff*', seqnames(rapa_v21_gff))]
rapa_v21_gff <- standardise_positions(rapa_v21_gff, rapa_V21)
rapa_FPSc_gff <- import.gff('./Brassica_rapa/FPSc/BrapaFPsc_277_v1.3.gene_exons.gff3')
rapa_FPSc_gff <- rapa_FPSc_gff[rapa_FPSc_gff$type == 'gene']
rapa_FPSc_gff <- rapa_FPSc_gff[!grepl('Scaff*', seqnames(rapa_FPSc_gff))]
rapa_FPSc_gff <- standardise_positions(rapa_FPSc_gff, rapa_FPSc)
rapa_v15_gff <- import.gff('./Brassica_rapa/v1.5/Brapa_gene_v1.5.gff')
rapa_v15_gff <- rapa_v15_gff[rapa_v15_gff$type == 'gene']
rapa_v15_gff <- rapa_v15_gff[!grepl('Scaff*', seqnames(rapa_v15_gff))]
rapa_v15_gff <- standardise_positions(rapa_v15_gff,rapa_v15)
# NOW COMES THE OLERACEA
oleracea_v21_gff <- import.gff('./Brassica_oleracea/Oleracea_v2.1/Brassica_oleracea.v2.1.34.gff3')
oleracea_v21_gff <- oleracea_v21_gff[oleracea_v21_gff$type == 'mRNA']
oleracea_v21_gff <- oleracea_v21_gff[!grepl('Scaff*', seqnames(oleracea_v21_gff))]
oleracea_v21_gff <- standardise_positions(oleracea_v21_gff, oleracea_v21)
# Plot gene density
cols <- brewer.pal(11, 'Spectral')
png('Gene_density.png', width = 800*3, height = 480*3, pointsize=12)
kp <- plotKaryotype(genome = fake, plot.type=4, cex=1)
kpAddBaseNumbers(kp, tick.dist=100, minor.tick.dist=20, cex=0.5)
MAX_DENSITY <- 305 # I want all tracks to have the same y-axis
kp <- kpPlotDensity(kp, napus_v4_gff, r0=0, r1=0.1, col=cols[1], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. napus\nv4', pos=1, r0=0, r1=0.1, cex=1, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0, r1=0.1, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, tapidor_gff, r0=0.12, r1=0.22, col=cols[2], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. napus\nTapidor', pos=1, r0=0.12, r1=0.22, cex=1, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.12, r1=0.22, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, zs11_gff, r0=0.24, r1=0.34, col=cols[3], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. napus\nZS11', pos=1, r0=0.24, r1=0.34, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.24, r1=0.34, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, napus_v81_gff, r0=0.36, r1=0.46, col=cols[4], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. napus\nv8.1', pos=1, r0=0.36, r1=0.46, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.36, r1=0.46, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
# Plotting rapa and oleracea results in warnings (no chrC, no chrA found), we can safely ignore that
kp <- kpPlotDensity(kp, rapa_v21_gff, r0=0.48, r1=0.58, col=cols[5], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. rapa\nv2.1', pos=1, r0=0.48, r1=0.58, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.48, r1=0.58, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, rapa_FPSc_gff, r0=0.6, r1=0.7, col=cols[6], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. rapa\nFPSc', pos=1, r0=0.6, r1=0.7, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.6, r1=0.7, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, rapa_v15_gff, r0=0.72, r1=0.8, col=cols[7], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. rapa\nv1.5', pos=1, r0=0.72, r1=0.8, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.72, r1=0.8, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
kp <- kpPlotDensity(kp, oleracea_v21_gff, r0=0.82, r1=0.9, col=cols[8], ymax=MAX_DENSITY, window.size=1)
kpAddLabels(kp, labels='B. oleracea\nv2.1', pos=1, r0=0.82, r1=0.9, label.margin=0.02)
kpAxis(kp, ymin = 0, r0=0.82, r1=0.9, ymax=MAX_DENSITY, cex=0.8, side=2)
stopifnot(kp$latest.plot$computed.values$max.density < MAX_DENSITY)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment