Skip to content

Instantly share code, notes, and snippets.

@jonrobinson2
Last active February 14, 2016 18:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonrobinson2/fe15f27b9e413dbdad45 to your computer and use it in GitHub Desktop.
Save jonrobinson2/fe15f27b9e413dbdad45 to your computer and use it in GitHub Desktop.
require(rvest)
require(lubridate)
require(stringr)
require(ggplot2)
age = function(from, to) {
from_lt = as.POSIXlt(from)
to_lt = as.POSIXlt(to)
age = to_lt$year - from_lt$year
ifelse(to_lt$mon < from_lt$mon |
(to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),
age - 1, age)
}
prisoner_id='http://apps.hcso.org/Inmates.aspx'
prisoner_id=prisoner_id %>% html %>% html_nodes('option') %>% html_attrs %>% unlist %>% as.numeric
prisoner_id=prisoner_id[!is.na(prisoner_id)]
dat=data.frame()
system.time(for(pr in prisoner_id){
message(pr)
prison=paste0("http://apps.hcso.org/InmateDetail.aspx?ID=",pr,"")
race=try(str_trim((prison %>% html %>%
html_node(xpath=paste0('////*[(@id = "lbRace")]')) %>%
html_attr(name='value') %>% strsplit(':') %>% unlist)[2]))
sex=try(str_trim((prison %>% html %>%
html_node(xpath=paste0('////*[(@id = "lbSex")]')) %>%
html_attr(name='value') %>% strsplit(':') %>% unlist)[2]))
dob=try(str_trim((prison %>% html %>%
html_node(xpath=paste0('////*[(@id = "lbDob")]')) %>%
html_attr(name='value') %>% strsplit(':') %>% unlist)[2]))
if(class(race)=='try-error'){
message('SKIPPING')
}else{
dat=rbind(dat, data.frame(race, sex, dob, pr, stringsAsFactors = FALSE))
}
})
.tmp=str_split(dat$dob,'/')
for(tm in 1:length(.tmp)){
if(nchar(.tmp[[tm]][1])==1){.tmp[[tm]][1]=paste0('0',.tmp[[tm]][1])}
if(nchar(.tmp[[tm]][2])==1){.tmp[[tm]][2]=paste0('0',.tmp[[tm]][2])}
.tmp[[tm]]=paste0(.tmp[[tm]],collapse='')
}
dat$dob=mdy(unlist(.tmp))
dat$age=age(from = dat$dob, as.POSIXct(Sys.Date()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment