Skip to content

Instantly share code, notes, and snippets.

@kurhula
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kurhula/cc33c024824cef324bc1 to your computer and use it in GitHub Desktop.
Save kurhula/cc33c024824cef324bc1 to your computer and use it in GitHub Desktop.
library(dplyr)
library(tidyr)
# 0. Empty all variables
rm(list=ls())
# 1. Load Data
data <- read.csv('input/data.csv', header=FALSE, stringsAsFactors=FALSE, sep = ';', dec = '.')
# 2. Prepare Data
# 2.1. Main Variables
wards <- slice(data,2) %>% select(2:5) %>% unlist() %>% as.character() %>% as.integer()
census <- slice(data,1) %>% select(2:5) %>% unlist() %>% as.character() %>% as.integer()
populations <- slice(data,3) %>% select(2:5) %>% unlist() %>% as.character() %>% as.integer()
population <- slice(data,4) %>% select(2:5)
# 2.2. Data Type Manipulation
p1 <- strsplit(population[1] %>% as.character(), ",") %>% unlist() %>% as.integer()
p2 <- strsplit(population[2] %>% as.character(), ",") %>% unlist() %>% as.integer()
p3 <- strsplit(population[3] %>% as.character(), ",") %>% unlist() %>% as.integer()
p4 <- strsplit(population[4] %>% as.character(), ",") %>% unlist() %>% as.integer()
# 3. Patch Data
# 3.1. Replace Missing Fields
missingPopulation <- function(){
require("Runuran")
p <- list(p1, p2, p3, p4)
mean <- mean(unlist(p), na.rm = TRUE)
sd <- sd(unlist(p), na.rm = TRUE)
lowerBound <- min(unlist(p), na.rm = TRUE)
upperBound <- max(unlist(p), na.rm = TRUE)
sumPop <- populations[3]
numWards <- wards[3]
pop3 <- urnorm(numWards, mean=mean, sd=sd, lb=lowerBound, ub=upperBound)
pop3 <- as.integer(round(pop3 + (sumPop-sum(pop3))/numWards,1))
accuracy <- round((sum(pop3)/population[3])*100,2)
return(pop3)
}
# 4. Plot Data
# 4.1. By Year
plotByYear <- function(){
layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE))
hist(p1, main = paste(census[1]), xlab = 'Population', ylab = 'Frequency')
hist(p2, main = paste(census[2]), xlab = 'Population', ylab = 'Frequency')
hist(p3, main = paste(census[3]), xlab = 'Population', ylab = 'Frequency')
hist(p4, main = paste(census[4]), xlab = 'Population', ylab = 'Frequency')
}
# 4.2. By Ward
plotByWard <- function(){
require(RColorBrewer)
layout(matrix(c(1), 2, 2, byrow = TRUE))
pops <- length(populations)
colours <- brewer.pal(pops,"Set1")
mypops <- list(p1, p2, p3, p4)
mymin <- min(unlist(mypops))
mymax <- max(unlist(mypops))
years <- census %>% as.character()
for (i in 1:pops){
popi <- mypops[[i]]
censusi <- years[i]
colouri <- colours[i]
if (i == 1){ plot(popi, col=colouri, type="l", ylim=c(mymin,mymax), xlab='Wards', ylab='Population')}
else{ points(popi, col=colouri, type="l")}
lastxval <- length(popi)
lastyval <- popi[length(popi)]
text((lastxval-10),(lastyval),censusi,col="black",cex=0.6)
}
}
# Run Program
p3 <- missingPopulation()
plotByYear()
plotByWard()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment