Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active May 24, 2016 03:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/aa148f7c171622470411264226e552fd to your computer and use it in GitHub Desktop.
Save TonyLadson/aa148f7c171622470411264226e552fd to your computer and use it in GitHub Desktop.
#remove(list = objects())
library(stringr)
library(dplyr)
library(ggplot2)
library(devtools)
library(readr)
library(repmis)
# ggplot theme
MyTheme = theme_bw() +
theme(
panel.background = element_rect(fill="gray98"),
axis.title.x = element_text(colour="grey20", size=14, margin=margin(20,0,0,0)),
axis.text.x = element_text(colour="grey20",size=12),
axis.title.y = element_text(colour="grey20",size=14, margin = margin(0,20,0,0)),
axis.text.y = element_text(colour="grey20",size=12),
legend.title = element_text(colour="grey20",size=12),
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left
# Read in urban data
# my.path <- "/Users/anthonyladson/Dropbox/Projects/N580-ARR-B4C2/Analysis"
# my.file <- "EIA-losses.txt"
# fname <- str_c(my.path, my.file, sep = '/' )
# fname
eia_losses <- source_data(url = 'https://dl.dropboxusercontent.com/u/10963448/EIA-losses.txt', sep = "\t", skip = 2)
eia_losses <- eia_losses %>%
rename(IL = `Initial loss estimate (mm)`)
glimpse(eia_losses, width = 80)
# clean up the IL column
# work on converting to numeric
#
# Catchment State Region IL Reference
# (chr) (chr) (chr) (chr) (chr)
# 1 Powells Creek NSW East Coast 2.6 to 2.9 Phillips et al. (2014)
# 2 Giralang ACT Murray Basin 1.3 to 1.6 Phillips et al. (2014)
# function to get the average of the numbers before and after the 'to'
ConvertToNum <- function(x){
x1 <- str_extract_all(x, '[\\d.]+') # extract numbers
f <- function(x){
mean(as.numeric(x))
}
sapply(x1, f)
}
eia_losses <- eia_losses %>%
mutate(IL = ConvertToNum(IL))
# Read in rural data
rural_losses <- source_data(url = 'https://dl.dropboxusercontent.com/u/10963448/Rural-losses.txt',
sep = "\t",
skip = 2,
colClasses = c(rep('character', 5), rep('numeric', 4), 'character' ))
glimpse(rural_losses, width = 80)
hist(rural_losses$IL_median)
plot(density(rural_losses$IL_median))
# comparison
IL.urban <- data_frame(IL = eia_losses$IL, catchment = rep('urban', times = nrow(eia_losses)))
IL.rural <- data_frame(IL = rural_losses$IL, catchment = rep('rural', times = nrow(rural_losses)))
IL <- bind_rows(IL.urban, IL.rural)
# Function to calculate the mode
# http://stackoverflow.com/questions/2547402/is-there-a-built-in-function-for-finding-the-mode
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
mean(IL$IL[IL$catchment == 'rural']) #32.344
median(IL$IL[IL$catchment == 'rural']) #29
Mode(IL$IL[IL$catchment == 'rural']) #25
sd(IL$IL[IL$catchment == 'rural']) #16.87
mean(IL$IL[IL$catchment == 'urban']) #1.09
median(IL$IL[IL$catchment == 'urban']) #0.9
Mode(IL$IL[IL$catchment == 'urban']) # 0
sd(IL$IL[IL$catchment == 'urban']) #1.17
ggplot(IL, aes(x=IL)) +
geom_density(aes(group=catchment, fill = catchment), alpha = 0.3, bw = 'SJ', adjust = 3 ) +
MyTheme +
ylab('Density') +
xlab('Initial loss (mm)') +
MyTheme +
scale_fill_manual(name = "",values=c("green","grey"), labels = c("Rural", "Urban")) +
theme(legend.title = element_text(colour="black",face="bold")) +
theme(legend.position = c(0.9, 0.9))
# plot urban and rural separately (to allow different bandwidths on density estimates
ggplot(data = NULL) +
geom_density(data = subset(IL, subset = catchment == 'urban'), aes(x = IL, fill = catchment), alpha = 0.3, bw = 'SJ', adjust = 4) +
geom_density(data = subset(IL, subset = catchment == 'rural'), aes(x = IL, fill = catchment), alpha = 0.3, bw = 'SJ', adjust = 1) +
MyTheme +
ylab('Density') +
xlab('Initial loss (mm)') +
MyTheme +
scale_fill_manual(name = "",values=c("green","grey"), labels = c("Rural", "Urban")) +
theme(legend.title = element_text(colour="black",face="bold")) +
theme(legend.position = c(0.9, 0.9))
@TonyLadson
Copy link
Author

TonyLadson commented May 23, 2016

Updated to calculate mean, median and mode of urban and rural losses

@TonyLadson
Copy link
Author

Updated to use different band widths for rural and urban

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment