Skip to content

Instantly share code, notes, and snippets.

@hdigital
Created May 3, 2016 09:31
Show Gist options
  • Save hdigital/9bdca6a2f605a27d5d28ca90b7752759 to your computer and use it in GitHub Desktop.
Save hdigital/9bdca6a2f605a27d5d28ca90b7752759 to your computer and use it in GitHub Desktop.
calculate cabinet duration with ParlGov data in R -- two approaches
# get and read ParlGov cabinet data
tbl <- 'view_cabinet.csv'
if( ! tbl %in% list.files()) {
download.file(paste0('http://www.parlgov.org/static/data/development-cp1252/', tbl), tbl)
}
## Base R approach
cab <- read.csv(tbl, fileEncoding = 'cp1252', as.is = TRUE)
# calculate duration of cabinet through self-merge on cabinet IDs
cab_dur <- cab[ ! duplicated(cab$cabinet_id), c('cabinet_id', 'previous_cabinet_id', 'start_date')]
cab_dur$start_date <- as.Date(cab_dur$start_date)
cab_dur <- merge(cab_dur, cab_dur, by.x = 'cabinet_id', by.y = 'previous_cabinet_id')
cab_dur$duration <- cab_dur$start_date.y - cab_dur$start_date.x
cab <- merge(cab, cab_dur[ , c('cabinet_id', 'duration')], by = 'cabinet_id')
## dplyr approach -- way nicer
library(dplyr)
cab <- read.csv(tbl, fileEncoding = 'cp1252', as.is = TRUE)
cab_dur <- cab %>%
mutate(start_date = as.Date(start_date)) %>%
select(cabinet_id, previous_cabinet_id, start_date) %>%
distinct
cab_dur <- cab_dur %>%
inner_join(cab_dur, by = c('cabinet_id' = 'previous_cabinet_id')) %>%
mutate(duration = start_date.y - start_date.x) %>%
select(cabinet_id, duration)
cab <- cab %>% left_join(cab_dur)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment