Skip to content

Instantly share code, notes, and snippets.

@murraycadzow
Created October 22, 2017 02:00
Show Gist options
  • Save murraycadzow/d5afce5ce46be493d17ff9f37f181f9e to your computer and use it in GitHub Desktop.
Save murraycadzow/d5afce5ce46be493d17ff9f37f181f9e to your computer and use it in GitHub Desktop.
Otago SWC 20-10-2017 R session 2
library(tidyverse)
gapminder <- read.csv('data/gapminder-FiveYearData.csv', header = TRUE, stringsAsFactors = FALSE)
str(gapminder)
dim(gapminder)
class(gapminder)
head(gapminder)
tail(gapminder)
gapminder %>% head()
# select country column
gapminder %>% select(country) %>% head()
# select country and year
gapminder %>% select(country, year) %>% head()
# remove country
gapminder %>% select(-country) %>% head()
# filtering rows
gapminder %>% filter(year > 1990) %>% head()
# filtering rows on multiple conditions
# & AND
# | OR
gapminder %>% filter(year > 1990 & lifeExp > 50) %>% head()
gapminder %>% filter(year > 1990 & lifeExp > 50) %>% select(country, year, lifeExp) %>% head()
# base R version
gapminder[ gapminder$year > 1990 & gapminder$lifeExp > 50, c("country", "year","lifeExp")]
# challenge
# rows with year 1990 (==) and country is Albania and just those columns (country, year)
ablania1990above <- gapminder %>% filter(year > 1990 & country == "Albania") %>% select(year, country)
# ordering rows
gapminder %>% arrange(year) %>% head()
gapminder %>% arrange(desc(year)) %>% head()
gapminder %>% arrange(desc(year), continent) %>% head()
# creating new column
gapminder %>% mutate(gdp = pop * gdpPercap) %>% head()
gapminder %>% mutate(year1990 = ifelse(year == 1990, TRUE, FALSE)) %>% head()
# groupings
gapminder %>% str()
gapminder %>% group_by(continent) %>% str()
# tally
gapminder %>% tally()
gapminder %>% group_by(continent) %>% tally()
# summarise
gapminder %>% summarise(mean_pop = mean(pop))
gapminder %>% group_by(continent) %>% summarise(mean_pop = mean(pop))
gapminder %>% group_by(continent) %>% summarise(mean_pop = mean(pop), sd_pop = sd(pop))
# challenge
# mean pop, grouped by country with largest mean as first row
gapminder %>% group_by(country) %>% summarise( mean(pop)) %>% arrange(desc(mean_pop))
# extra:
# mean gdp by continent by year
gapminder %>% mutate(gdp = gdpPercap * pop) %>% group_by(continent, year) %>% summarise(mean_gdp = mean(gdp))
# plotting
ggplot(data = gapminder, aes(x = year, y = lifeExp))
ggplot(gapminder, aes(x = year, y = lifeExp)) + geom_point()
ggplot(gapminder, aes(x = year, y = lifeExp)) + geom_line()
# lines, groupings, colour
ggplot(gapminder, aes(x = year, y = lifeExp, by = country, colour = continent)) + geom_line()
# layering
ggplot(gapminder, aes(x = year, y = lifeExp, by = country, colour = continent)) + geom_line() + geom_point()
#specific aes for a layer
ggplot(gapminder, aes(x = year, y = lifeExp, by = country)) + geom_line(aes(colour = continent)) + geom_point()
# tranform axis
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point()
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point() + scale_x_log10()
# statistics
last_plot() + geom_smooth(method = 'lm')
my_plot <- ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point() + scale_x_log10()
my_plot
my_plot + geom_smooth(method = 'lm')
# facetting
az.countries <- gapminder %>% mutate( starts.with = substr(country,start = 1, stop =1)) %>% filter(starts.with %in% c('A','Z'))
ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country )
# themes
ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country ) + labs(x = 'Year', y = "Life Expectancy", title = "Figure 1", colour = "Continent") + theme(axis.text.x =element_blank(), axis.ticks.x = element_blank())
write.table(x = az.countries, file = 'data/az_countries.csv', col.names = TRUE, row.names = FALSE, quote = FALSE, sep =',')
my_plot <- ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country ) + labs(x = 'Year', y = "Life Expectancy", title = "Figure 1", colour = "Continent") + theme(axis.text.x =element_blank(), axis.ticks.x = element_blank())
ggsave(plot = my_plot, filename = 'figure1.png' )
# Functions
my_function <- function(){
print("my first function")
}
arg_function <- function(word, num){
print(word)
print(num * 5)
}
my_plot <- function(data){
plot(ggplot(data, aes(x = year, y = gdpPercap)) + geom_point())
}
for(cou in unique(az.countries$country)){
my_plot(data = az.countries %>% filter(country == cou))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment