Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Created November 18, 2013 21:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save psychemedia/7536017 to your computer and use it in GitHub Desktop.
Save psychemedia/7536017 to your computer and use it in GitHub Desktop.
Isle of Wight Employment Stats
========================================================
This is an example of pulling down jobless figures from nomisweb for the Isle of Wight - http://rpubs.com/psychemedia/9133
Provided as a demonstrator for OnTheWight by Tony Hirst, (Computing and Communications Department, The Open University, and the Open Knowledge Foundation).
```{r echo=FALSE, message=FALSE}
#Package loading
list.of.packages <- c("XML", "RCurl", "ggplot2", "xtable")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(XML)
library(RCurl)
library(ggplot2)
library(xtable)
```
```{r echo=FALSE, message=FALSE}
#Utility functions
commaReplace = function(node) {
val = xmlValue(node)
ans = gsub(",", "", val)
if(is.na(ans))
val
else
ans
}
comma=function(txt){paste(txt,',',sep='')}
```
```{r echo=FALSE, message=FALSE}
#Total stats loader
#total1, male1, female1
tt=postForm("https://www.nomisweb.co.uk/reports/lmp/la/2038431803/subreports/jsa_time_series/report.aspx", .encoding='utf-8', pivot="total1")
tables <- readHTMLTable(tt,stringsAsFactors = FALSE,elFun=commaReplace ) #encoding = "UTF-8")
nn=gsub('([\n\t\r])','',names(tables$`NULL`))
names(tables$`NULL`)=nn
ud=tables$`NULL`
ud['Isle of Wight']=lapply(ud['Isle of Wight'], function(x) as.integer(x))
ud['South East(%)']=lapply(ud['South East(%)'], function(x) as.numeric(x))
ud['Great Britain(%)']=lapply(ud['Great Britain(%)'], function(x) as.numeric(x))
#We need to create a date. IF we just the 1/MON/YEAR the chart labeling maybe looks better than the possibly more correct lastDayOfMonth/MON/YEAR? Or use middle of month (15th, say?)?
ud['tDate']=lapply(ud['Date'], function(x) as.Date(paste(28,x),"%d %b %Y"))
```
The most recent figures:
```{r results='asis', message=FALSE, echo=FALSE}
print(xtable(ud[nrow(ud),c(1,2,4,5)]),include.rownames=FALSE,type='html')
```
The previous figures:
```{r results='asis', message=FALSE, echo=FALSE}
print(xtable(ud[nrow(ud)-1,c(1,2,4,5)]),include.rownames=FALSE,type='html')
```
Last three months' figures:
```{r results='asis', message=FALSE, echo=FALSE}
df.tmp=ud[(nrow(ud)-2):nrow(ud),c(1,2,4,5)]
print(xtable(df.tmp),include.rownames=FALSE,type='html')
```
Previous year's figures:
```{r results='asis', message=FALSE, echo=FALSE}
df.tmp=ud[(nrow(ud)-14):(nrow(ud)-11),c(1,2,4,5)]
print(xtable(df.tmp),include.rownames=FALSE,type='html')
```
A little bit of machine generated explanatory text:
```{r results='asis', message=FALSE, echo=FALSE, comment='' }
txt="The total number of people claiming Job Seeker's Allowance (JSA) on the Isle of Wight in"
txt=paste(txt,format(ud[nrow(ud),]$tDate,'%B'),'was',ud[nrow(ud),'Isle of Wight'])
txt=comma(txt)
jsa.lm=ud[nrow(ud)-1,'Isle of Wight']
jsa.diff.m=ud[nrow(ud),'Isle of Wight']-jsa.lm
if (jsa.diff.m>0){
txt=paste(txt,'up',jsa.diff.m,'from',jsa.lm)
} else if (jsa.diff.m<0) {
txt=paste(txt,'down',abs(jsa.diff.m),'from',jsa.lm)
} else { txt=paste(txt,'the same as') }
txt=paste(txt,'in',format(ud[nrow(ud)-1,]$tDate,'%B, %Y,'))
txt=paste(txt,'and')
last.yr.row=nrow(ud)-12
jsa.ly=ud[last.yr.row,'Isle of Wight']
jsa.diff.y=ud[nrow(ud),'Isle of Wight']-jsa.ly
if (jsa.diff.y>0){
txt=paste(txt,'up',jsa.diff.y,'from',jsa.ly)
} else if (jsa.diff.y<0) {
txt=paste(txt,'down',abs(jsa.diff.y),'from',jsa.ly)
} else { txt=paste(txt,'the same as') }
txt=paste(txt,'in',format(ud[last.yr.row,]$tDate,'%B, %Y.'))
cat(txt)
```
```{r fig.width=7, fig.height=6, message=FALSE, echo=FALSE}
ud$dy <- unlist(format(ud["tDate"],'%Y'))
ud$dm <- unlist(format(ud["tDate"],'%b'))
ud$dm=factor(ud$dm)
ud$dm<- factor(ud$dm, levels = month.abb)
g=ggplot(ud[ud$dy>2010,])+geom_line(aes_string(x='dm',y='`Isle of Wight`',group='dy',col='dy'))+theme(axis.text.x = element_text(angle = 90, hjust = 1))
g=g+ylab('Isle of Wight JSA claimants')
g
```
```{r fig.width=7, fig.height=6, message=FALSE, echo=FALSE}
library(directlabels)
g=ggplot(ud[ud$dy>2010,],aes_string(x='dm',y='`Isle of Wight`'))+geom_line(aes_string(group='dy',col='dy'))+theme(axis.text.x = element_text(angle = 90, hjust = 1))+scale_x_discrete(expand=c(0, 1.5))
#direct.label(g)
#In the following, new to increase internal viewport so labels don't overflow right
g=g+ylab('Isle of Wight JSA claimants')
direct.label(g,'last.points')
```
If we plot the monthly change we get a better idea for how monthly changes compare with previous years:
```{r fig.width=7, fig.height=6, message=FALSE, echo=FALSE}
ud$l=c(NA, diff(ud$`Isle of Wight`, lag = 1))
g=ggplot(ud[ud$dy>2010,])+geom_line(aes_string(x='dm',y='l',group='dy',col='dy'))+theme(axis.text.x = element_text(angle = 90, hjust = 1))
g=g+ylab('Isle of Wight JSA claimants - monthly change')
print(g)
```
```{r fig.width=7, fig.height=6, message=FALSE, echo=FALSE}
ggplot(ud)+geom_line(aes_string(x='dy',y='`Isle of Wight`',group='dm'))+facet_grid(~dm)+theme(axis.text.x = element_text(angle = 90, hjust = 1))
#reorder(variable, value)
```
```{r fig.width=7, fig.height=6, message=FALSE, echo=FALSE}
ggplot(ud[(nrow(ud)-11):nrow(ud),])+geom_line(aes_string(x='tDate',y='`Isle of Wight`'))+theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
require(reshape)
nomis <- read.csv('http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?geography=1946157281&date=latestMINUS1,latest&select=date_name,geography_name,geography_code,sex_name,item_name,measures_name,obs_value,obs_status_name&measures=20100,20203')
nomis <- read.csv('http://www.nomisweb.co.uk/api/v01/dataset/NM_4_1.data.csv?geography=1946157281&date=latestMINUS1,latest&measures=20100&select=DATE,DATE_NAME,GEOGRAPHY_NAME,AGE_DUR_NAME,SEX_NAME,MEASURES,MEASURES_NAME,OBS_VALUE')
nx=nomis[grep('Claiming',nomis$AGE_DUR_NAME),]
nx[nx$SEX_NAME=='Total,']
nx$AGE_DUR_NAME <- factor(nx$AGE_DUR_NAME, levels = c("Claiming one week or less", "Claiming over 1 and up to 2 weeks", "Claiming over 2 and up to 4 weeks" , "Claiming over 4 and up to 6 weeks" , "Claiming over 6 and up to 8 weeks" ,"Claiming over 8 and up to 13 weeks", "Claiming over 13 and up to 26 weeks","Claiming over 26 and up to 39 weeks","Claiming over 39 and up to 52 weeks", "Claiming over 52 and up to 65 weeks","Claiming over 65 and up to 78 weeks" ,"Claiming over 78 and up to 104 weeks" ,"Claiming over 104 and up to 156 weeks" , "Claiming over 156 and up to 208 weeks","Claiming over 208 and up to 260 weeks" ,"Claiming over 260 weeks" ))
nxxx=cast(nx[,c('DATE','AGE_DUR_NAME','OBS_VALUE')],AGE_DUR_NAME~DATE)
require(stringr)
nnx=nomis[grep('claiming',nomis$AGE_DUR_NAME),]
levels(nnx)=droplevels(nnx)
nnx$ad=str_replace(nnx$AGE_DUR_NAME,'Aged (.*) claiming.*','\\1')
nnx$ad=factor(nnx$ad)
nnx$ad=factor(nnx$ad,levels=c("under 17","17","18","19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60 and over"))
nnx$dur=str_replace(nnx$AGE_DUR_NAME,'.* claiming (.*)','\\1')
nnx$dur=factor(nnx$dur)
nnx$dur=factor(nnx$dur,levels=c("one week or less","over 1 and up to 2 weeks","over 2 and up to 4 weeks","over 4 and up to 6 weeks","over 6 and up to 8 weeks","over 8 and up to 13 weeks","over 13 and up to 26 weeks","over 26 and up to 39 weeks","over 39 and up to 52 weeks","over 52 and up to 65 weeks","over 65 and up to 78 weeks","over 78 and up to 104 weeks","over 104 and up to 156 weeks","over 156 and up to 208 weeks","over 208 and up to 260 weeks","over 260 weeks" ))
ggplot(nnx)+geom_line(aes(x=dur,y=OBS_VALUE,group=ad,col=ad))+facet_wrap(~SEX_NAME+DATE)+theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(nnx[nnx$DATE=='2013-10',])+geom_line(aes(x=dur,y=OBS_VALUE,group=SEX_NAME,col=SEX_NAME))+facet_wrap(~ad)+theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment