Skip to content

Instantly share code, notes, and snippets.

@jalapic
Last active August 29, 2015 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/5e05ab273e3388b5bf74 to your computer and use it in GitHub Desktop.
Save jalapic/5e05ab273e3388b5bf74 to your computer and use it in GitHub Desktop.
### Quick illustration of dplyr / ggplot2
library(dplyr)
library(ggplot2)
library(magrittr)
# Install babynames package
install.packages("babynames")
# load library
library(babynames)
## take a look at the data
str(babynames)
head(babynames)
tail(babynames)
?babynames
# Think of some questions you'd like me to answer using this data ???
### e.g. which name was the most popular ever - and in which year?
babynames %>% arrange(desc(n))
babynames %>% arrange(desc(prop))
babynames %>% arrange(desc(prop)) %>% filter(sex=="F")
babynames %>% arrange(desc(prop)) %>% filter(sex=="F")
babynames %>% arrange(desc(prop)) %>% filter(sex=="F" & name!="Mary")
# e.g. How has the use of James changed over time?
james <- babynames %>%
filter(name=="James")
head(james)
# line graph
ggplot(james, aes(year, n)) +
geom_line(aes(color=sex), lwd=1) +
scale_color_manual(values = c("firebrick1", "dodgerblue")) +
theme_bw()
#can do it all in one
babynames %>%
filter(name=="Chris") %$%
ggplot(., aes(year, n)) +
geom_line(aes(color=sex), lwd=1) +
scale_color_manual(values = c("firebrick1", "dodgerblue")) +
theme_bw()
## let's compare three names...
threenames <- babynames %>%
filter(sex=="F") %>%
filter(name=="Jennifer" | name=="Sarah" | name=="Mary" )
head(threenames)
ggplot(threenames, aes(year, n)) +
geom_line(aes(group=name, color=name), lwd=1) +
scale_color_manual(values = c("firebrick1", "dodgerblue", "darkorange")) +
theme_bw()
# another way of doing the filtering above.
mynames <- c("Kim", "Khloe", "Kendall", "Kourtney", "Kylie")
kards <- babynames %>%
filter(sex=="F") %>%
filter((name %in% mynames ) == T)
head(kards)
ggplot(kards, aes(year, n)) +
geom_line(aes(group=name, color=name), lwd=1) +
scale_color_manual(values = c("firebrick1", "dodgerblue", "darkorange", "purple", "green1")) +
theme_bw()
## e.g. 2. Names that don't get used anymore...
pre <-
babynames %>%
filter(year<1945) %>%
group_by(name) %>%
summarize(total = sum(n)) %>%
rename(npre = total)
post <-
babynames %>%
filter(year>=1945) %>%
group_by(name) %>%
summarize(total = sum(n)) %>%
rename(npost = total)
head(pre)
head(post)
full_join(pre, post, by = "name") #notice that some rows have "NA" - means they're missing (i.e. zeros)
ournames <- full_join(pre, post, by = "name") #notice that some rows have "NA" - means they're missing (i.e. zeros)
ournames %>%
filter(is.na(npost)==T) %>%
arrange(desc(npre))
## let's look at distribution of each of these in one graph....
#extact top 10 vanished names as a character vector
oldnames <- ournames %>%
filter(is.na(npost)==T) %>%
arrange(desc(npre)) %>%
head(10) %>%
.$name
oldnames
oldnames1 <- babynames %>%
filter((name %in% oldnames ) == T)
head(oldnames1)
ggplot(oldnames1, aes(year, n)) +
geom_line(aes(group=name, color=name), lwd=1) +
theme_bw()
### names beginning with "Adelaid"
x<-babynames[(grepl("Adelaid", babynames$name) == T),]
unique(x$name)
adelaids <-
babynames %>%
filter(grepl("Adelaid", name) == T) %>%
filter(sex=="F") %>%
group_by(year) %>%
summarize(total=sum(n))
adelaids
ggplot(adelaids, aes(year, total)) +
geom_line(color="black", lwd=1) +
theme_bw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment