Skip to content

Instantly share code, notes, and snippets.

@hrbrmstr
Last active August 29, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hrbrmstr/0a330b212b46517ffee9 to your computer and use it in GitHub Desktop.
Save hrbrmstr/0a330b212b46517ffee9 to your computer and use it in GitHub Desktop.
Answer to http://stackoverflow.com/questions/22855197/how-do-you-get-geom-map-to-show-all-parts-of-a-map/22875922 since it's long-ish and easier to cut/paste from here than on SO
library(ggplot2)
library(maps)
library(plyr)
library(gridExtra)
ARCTP53_SOExample <- read.csv("dat.csv")
# reduce all the distinct exon/introns to just exon or intron
ARCTP53_SOExample$EorI <- factor(ifelse(grepl("exon",
ARCTP53_SOExample$ExonIntron,
ignore.case = TRUE),
"exon", "intron"))
# extract summary data for the two variables we care about for the map
arc.combined <- count(ARCTP53_SOExample, .(Country, EorI))
colnames(arc.combined) <- c("region", "EorI", "ei.ct")
# get total for country (region) and add to the summary info
arc.combined <- merge(arc.combined, count(arc.combined, .(region), wt_var=.(ei.ct)))
colnames(arc.combined) <- c("region", "EorI", "ei.ct", "region.total")
# it wasn't specified if the "EorI" is going to be used on the map so
# we won't use it below (but we could, now)
# get map and intercourse Antarctica
world_map <- map_data("world")
world_map <- subset(world_map, region!="Antarctica")
# DEAL WITH MISSING REGIONS
# get our regions and the regions named in the "world"
wm.reg <- unique(as.character(world_map$region))
arc.reg <- unique(as.character(ARCTP53_SOExample$Country))
# this shows the missing ones
sort(arc.reg[!arc.reg %in% wm.reg])
# this shows all valid ones
sort(wm.reg)
# INSERT YOUR #SPIFFY CODE TO CLEANUP THE REGIONS HERE :-)
# this will show the counts by country with all of the "chart junk" removed
# and the "counts" scaled as a gradient, and with the legend at the top
gg <- ggplot(arc.combined)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = region.total), size=0.25)
gg <- gg + scale_fill_gradient(low="#fff7bc", high="#cc4c02", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg
# BUT you might want to show the counts by intron/exon by country
gg <- ggplot(arc.combined[arc.combined$EorI=="exon",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct), size=0.25)
gg <- gg + scale_fill_gradient(low="#f7fcb9", high="#238443", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'exon' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.exon <- gg
gg <- ggplot(arc.combined[arc.combined$EorI=="intron",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct),
colour = "#7f7f7f", size=0.25)
gg <- gg + scale_fill_gradient(low="#ece7f2", high="#0570b0", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'intron' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.intron <- gg
grid.arrange(gg.exon, gg.intron, ncol=1)
library(ggplot2)
library(maps)
library(plyr)
library(gridExtra)
library(maptools)
ARCTP53_SOExample <- read.csv("dat.csv")
# reduce all the distinct exon/introns to just exon or intron
ARCTP53_SOExample$EorI <- factor(ifelse(grepl("exon",
ARCTP53_SOExample$ExonIntron,
ignore.case = TRUE),
"exon", "intron"))
# extract summary data for the two variables we care about for the map
arc.combined <- count(ARCTP53_SOExample, .(Country, EorI))
colnames(arc.combined) <- c("id", "EorI", "ei.ct")
# get total for country (region) and add to the summary info
arc.combined <- merge(arc.combined, count(arc.combined, .(id), wt_var=.(ei.ct)))
colnames(arc.combined) <- c("id", "EorI", "ei.ct", "region.total")
# it wasn't specified if the "EorI" is going to be used on the map so
# we won't use it below (but we could, now)
# get map and intercourse Antarctica
data(wrld_simpl)
world_map <- fortify(wrld_simpl, region="NAME")
world_map <- subset(world_map, id!="Antarctica")
world_map <- world[order(world_map$order), ]
# DEAL WITH MISSING REGIONS
# get our regions and the regions named in the "world"
# clean up our data first
arc.combined$id <- gsub("USA", "United States", arc.combined$id)
arc.combined$id <- gsub("UK", "United Kingdom", arc.combined$id)
arc.combined$id <- gsub("The Netherlands", "Netherlands", arc.combined$id)
arc.combined$id <- gsub("Iran", "Iran (Islamic Republic of)", arc.combined$id)
arc.combined$id <- gsub("China.*", "China", arc.combined$id)
arc.combined$id <- gsub("Chinese.*", "Taiwan", arc.combined$id)
wm.reg <- unique(as.character(world_map$id))
arc.reg <- unique(as.character(arc.combined$id))
# this shows the missing ones
sort(arc.reg[!arc.reg %in% wm.reg])
# this shows all valid ones
# INSERT YOUR #SPIFFY CODE TO CLEANUP THE REGIONS HERE :-)
# this will show the counts by country with all of the "chart junk" removed
# and the "counts" scaled as a gradient, and with the legend at the top
gg <- ggplot(arc.combined)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=id),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = id, fill = region.total), size=0.25)
gg <- gg + scale_fill_gradient(low="#fff7bc", high="#cc4c02", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg
# BUT you might want to show the counts by intron/exon by country
gg <- ggplot(arc.combined[arc.combined$EorI=="exon",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=id),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = id, fill = ei.ct), size=0.25)
gg <- gg + scale_fill_gradient(low="#f7fcb9", high="#238443", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'exon' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.exon <- gg
gg <- ggplot(arc.combined[arc.combined$EorI=="intron",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=id),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = id, fill = ei.ct),
colour = "#7f7f7f", size=0.25)
gg <- gg + scale_fill_gradient(low="#ece7f2", high="#0570b0", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'intron' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.intron <- gg
grid.arrange(gg.exon, gg.intron, ncol=1)
@OFish
Copy link

OFish commented Apr 7, 2014

Haha. Thanks, can't really say its a "huge" find, esecially because it was simply due to luck (and I play a lot of Trivia in pubs after work, haha). But I was just composing an email to the author, as I do find it a bit "worrysome" that its plotting a map that's still using the USSR. Basically that would mean that all countries like lithuania, some of the "stans", georgia etc. don't turn up on the map. Let me know if you want me to hold off with the email...and write a blog post instead. Was just thinking that maybe the authors want to communicate that themselves. Seeing that the last "update" was in 2013...... :-/

Thanks for the links to the resources. I'll definitely have a look at them and try to learn the syntax. I must say, I've only been using R for say 6 months or so and it's pretty insane how much I learn every day. I'd love to learn how to clean up the data in R, as switching to excel is sometimes tedious and because of my dataset size I often have multiple text files open....for some reason this helps me keep an overview of what I'm working on instead of working out of one huge sheet....eventhough R-purists find that "ridiculous".

I'll send you an email with the other stuff I've been thinking about re the map. I really, really appreciate your help.

Cheers,

Oli

@hrbrmstr
Copy link
Author

hrbrmstr commented Apr 7, 2014

Well, this explains the real reason why hadley made the fortify() function for geom_map(). I added another sample source code file that uses a different set of maps that are far more current. code is mostly the same except for a change in variable name (region is now id).

I'm normally making more localized maps from shapefiles so i use that function alot. I didn't realize they weren't updating the base maps package maps anymore.

@OFish
Copy link

OFish commented Apr 7, 2014

Woha. Ok. Amazing. The OpenRefine platform is awesome, have just d/ld it to have a look at the data.frame as it might need some cleaning up anyways :-).

Regarding your code however, where did you specify wrdl_simpl? Getting an error from R there...and couldn't find it anywhere in the code.

@hrbrmstr
Copy link
Author

hrbrmstr commented Apr 7, 2014

typo (mebbe). or it cld be that you didn't library(maptools) first.. the 'new' code now has fully working code and cleans up your data frame :-) had to make sure it worked for your purposes.

@OFish
Copy link

OFish commented Apr 7, 2014

Thanks for this. So this is what I get from maptools

library(maptools)
Loading required package: sp
Checking rgeos availability: FALSE
Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
which has a restricted licence. It is disabled by default;
to enable gpclib, type gpclibPermit()
gpclibPermit()
[1] FALSE

It's weird, I can't enable it...?

EDIT: Didn't see you added the clean-up code!! Ha! Amazing, thanks!

@OFish
Copy link

OFish commented Apr 7, 2014

So the code's returning an error now by the way...can't figure out where it is....tried editing some things. Note, just changed the title names a bit.

gg <- ggplot(arc.combined)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=id), fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = id, fill = region.total), size=0.25)
gg <- gg + scale_fill_gradient(low="#fff7bc", high="#cc4c02", name="Number of tumors")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Number of tumors analyzed by country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg
Don't know how to automatically pick scale for object of type function. Defaulting to continuous
Error in data.frame(map_id = function (.variables, drop = FALSE) :
arguments imply differing number of rows: 0, 24377

That's weird...

@OFish
Copy link

OFish commented Apr 9, 2014

Hi there. Took some time but worked my may around the issues with maptoolsand gpclibPermit(). On line 32 I believe there's a small mistake in your code:

world_map <- world[order(world_map$order), ] ## should be
world_map <- world_map[order(world_map$order), ]

If you change all of that I get the respective plots. Thanks for that!
Sent you an msge as well, which is not at all of priority. But thanks so much for everything until now. !
Cheers,

O.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment