Created
October 7, 2014 13:08
-
-
Save dggoldst/99d760e2d26a9e171349 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("C:/Dropbox/Projects/20141002_SJDM_Member_Dir/") | |
library("dplyr") | |
library("maps") | |
library("ggplot2") | |
library("mapproj") | |
theme_clean <- function(base_size = 12) { | |
require(grid) | |
theme_grey(base_size) %+replace% | |
theme( | |
axis.title = element_blank(), | |
axis.text = element_blank(), | |
panel.background = element_blank(), | |
panel.grid = element_blank(), | |
axis.ticks.length = unit(0,"cm"), | |
axis.ticks.margin = unit(0,"cm"), | |
panel.margin = unit(0,"lines"), | |
plot.margin = unit(c(0,0,0,0),"lines"), | |
legend.position = "bottom", | |
complete = TRUE | |
) | |
} | |
simpleCap <- function(x) { | |
s <- strsplit(x, " ")[[1]] | |
paste(toupper(substring(s, 1,1)), tolower(substring(s, 2)), | |
sep="", collapse=" ") | |
} | |
vsimpleCap=Vectorize(simpleCap) | |
state_map=map_data(map="state") | |
df=read.csv("directory.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
st=read.csv("state_table.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
names(st)=c("name","state","USregion") | |
df=left_join(df,st,by="state") | |
rg=read.csv("regions.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
#Cleanup | |
df=mutate(df, | |
country=vsimpleCap(country), | |
country=ifelse(country=="","USA",as.character(country)), | |
country=ifelse(country=="United Kingdom","UK",as.character(country)), | |
country=ifelse(country=="Uk","UK",as.character(country)), | |
country=ifelse(country=="P.r. China","China",as.character(country)), | |
country=ifelse(country=="Colombia","Columbia",as.character(country)), | |
country=ifelse(country=="Republic Of Armenia","Armenia",as.character(country)) | |
) | |
countries=df %>% group_by(country) %>% summarise(count=length(country)) %>% arrange(-count) | |
countries=left_join(countries,rg) | |
countries$region=factor(countries$region,levels=(countries%>%group_by(region)%>%summarize(count=sum(count))%>%arrange(-count))$region) | |
countries$usaqual = factor(with(countries,ifelse(country=="USA","USA","Rest of World")),levels=c("USA","Rest of World")) | |
usaness= countries %>% group_by(usaqual) %>% summarize(count=sum(count)) %>% arrange(-count) | |
p=ggplot(usaness,aes(x=usaqual,y=count,fill=usaqual)) | |
p=p+geom_bar(stat="identity") | |
p=p+labs(x="",y="",title="Members in USA and Rest of World") | |
p=p+theme(legend.position = "none") | |
p | |
ggsave(plot=p,file="USAvRest.png",width=4,height=4) | |
p=ggplot(countries,aes(x=region,y=count,fill=region)) | |
p=p+geom_bar(stat="identity") | |
p=p+labs(x="",y="",title="Members by Region") | |
p=p+theme( axis.text.x = element_text(angle=90, vjust=0.5),legend.position="none") | |
p=p+theme(legend.position = "none") | |
p | |
ggsave(plot=p,file="MembersByRegion.png",width=4,height=4) | |
p=ggplot(filter(countries,country!="USA" & count>5),aes(x=count,y=reorder(country,count))) | |
p=p+geom_point(size=3) | |
p=p+labs(x="Members",y="",title="Members Outside USA\nin Countries with >5 Members") | |
p=p+theme_bw() | |
p | |
ggsave(plot=p,file="MembersByCountry.png",width=4,height=6) | |
#Make state map | |
states=df %>% filter(!is.na(USregion)) %>% group_by(name) %>% summarise(count=length(state)) %>% arrange(-count) | |
states$name=tolower(states$name) | |
states[states$name=="washington dc","name"]="district of columbia" | |
allst=data.frame(name=as.character(unique(state_map$region))) | |
states=merge(allst,states,by="name",all.x=TRUE) %>% arrange(-count) | |
states[is.na(states$count),2]=0 | |
names(states)=c("state","count") | |
p=ggplot(states, aes(map_id=state, fill=count)) | |
p=p+geom_map(map=state_map,color="black") | |
p=p+scale_fill_gradient2(low="#559999",mid="grey90",high="#BB650B",midpoint=median(states$count)) | |
p=p+expand_limits(x=state_map$long,y=state_map$lat) | |
p=p+coord_map("polyconic") | |
p=p+theme_clean() | |
p | |
ggsave(plot=p,file="us.member.map.png",width=6,height=4) | |
states$state_simplecap = vsimpleCap(as.character(states$state)) | |
#Members in states with > 5 members | |
p=ggplot(filter(states, count>5),aes(x=count,y=reorder(state_simplecap,count))) | |
p=p+geom_point(size=3) | |
p=p+labs(x="Members",y="",title="Members in States \nWith >5 Members") | |
p=p+theme_bw() | |
p | |
ggsave(plot=p,file="MembersByState.png",width=4,height=8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment