Created
October 22, 2017 02:00
-
-
Save murraycadzow/d5afce5ce46be493d17ff9f37f181f9e to your computer and use it in GitHub Desktop.
Otago SWC 20-10-2017 R session 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
gapminder <- read.csv('data/gapminder-FiveYearData.csv', header = TRUE, stringsAsFactors = FALSE) | |
str(gapminder) | |
dim(gapminder) | |
class(gapminder) | |
head(gapminder) | |
tail(gapminder) | |
gapminder %>% head() | |
# select country column | |
gapminder %>% select(country) %>% head() | |
# select country and year | |
gapminder %>% select(country, year) %>% head() | |
# remove country | |
gapminder %>% select(-country) %>% head() | |
# filtering rows | |
gapminder %>% filter(year > 1990) %>% head() | |
# filtering rows on multiple conditions | |
# & AND | |
# | OR | |
gapminder %>% filter(year > 1990 & lifeExp > 50) %>% head() | |
gapminder %>% filter(year > 1990 & lifeExp > 50) %>% select(country, year, lifeExp) %>% head() | |
# base R version | |
gapminder[ gapminder$year > 1990 & gapminder$lifeExp > 50, c("country", "year","lifeExp")] | |
# challenge | |
# rows with year 1990 (==) and country is Albania and just those columns (country, year) | |
ablania1990above <- gapminder %>% filter(year > 1990 & country == "Albania") %>% select(year, country) | |
# ordering rows | |
gapminder %>% arrange(year) %>% head() | |
gapminder %>% arrange(desc(year)) %>% head() | |
gapminder %>% arrange(desc(year), continent) %>% head() | |
# creating new column | |
gapminder %>% mutate(gdp = pop * gdpPercap) %>% head() | |
gapminder %>% mutate(year1990 = ifelse(year == 1990, TRUE, FALSE)) %>% head() | |
# groupings | |
gapminder %>% str() | |
gapminder %>% group_by(continent) %>% str() | |
# tally | |
gapminder %>% tally() | |
gapminder %>% group_by(continent) %>% tally() | |
# summarise | |
gapminder %>% summarise(mean_pop = mean(pop)) | |
gapminder %>% group_by(continent) %>% summarise(mean_pop = mean(pop)) | |
gapminder %>% group_by(continent) %>% summarise(mean_pop = mean(pop), sd_pop = sd(pop)) | |
# challenge | |
# mean pop, grouped by country with largest mean as first row | |
gapminder %>% group_by(country) %>% summarise( mean(pop)) %>% arrange(desc(mean_pop)) | |
# extra: | |
# mean gdp by continent by year | |
gapminder %>% mutate(gdp = gdpPercap * pop) %>% group_by(continent, year) %>% summarise(mean_gdp = mean(gdp)) | |
# plotting | |
ggplot(data = gapminder, aes(x = year, y = lifeExp)) | |
ggplot(gapminder, aes(x = year, y = lifeExp)) + geom_point() | |
ggplot(gapminder, aes(x = year, y = lifeExp)) + geom_line() | |
# lines, groupings, colour | |
ggplot(gapminder, aes(x = year, y = lifeExp, by = country, colour = continent)) + geom_line() | |
# layering | |
ggplot(gapminder, aes(x = year, y = lifeExp, by = country, colour = continent)) + geom_line() + geom_point() | |
#specific aes for a layer | |
ggplot(gapminder, aes(x = year, y = lifeExp, by = country)) + geom_line(aes(colour = continent)) + geom_point() | |
# tranform axis | |
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point() | |
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point() + scale_x_log10() | |
# statistics | |
last_plot() + geom_smooth(method = 'lm') | |
my_plot <- ggplot(gapminder, aes(x = gdpPercap, y = lifeExp )) + geom_point() + scale_x_log10() | |
my_plot | |
my_plot + geom_smooth(method = 'lm') | |
# facetting | |
az.countries <- gapminder %>% mutate( starts.with = substr(country,start = 1, stop =1)) %>% filter(starts.with %in% c('A','Z')) | |
ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country ) | |
# themes | |
ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country ) + labs(x = 'Year', y = "Life Expectancy", title = "Figure 1", colour = "Continent") + theme(axis.text.x =element_blank(), axis.ticks.x = element_blank()) | |
write.table(x = az.countries, file = 'data/az_countries.csv', col.names = TRUE, row.names = FALSE, quote = FALSE, sep =',') | |
my_plot <- ggplot(az.countries, aes(x = year, y = lifeExp, colour = continent)) + geom_line() + facet_wrap(~ country ) + labs(x = 'Year', y = "Life Expectancy", title = "Figure 1", colour = "Continent") + theme(axis.text.x =element_blank(), axis.ticks.x = element_blank()) | |
ggsave(plot = my_plot, filename = 'figure1.png' ) | |
# Functions | |
my_function <- function(){ | |
print("my first function") | |
} | |
arg_function <- function(word, num){ | |
print(word) | |
print(num * 5) | |
} | |
my_plot <- function(data){ | |
plot(ggplot(data, aes(x = year, y = gdpPercap)) + geom_point()) | |
} | |
for(cou in unique(az.countries$country)){ | |
my_plot(data = az.countries %>% filter(country == cou)) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment