Skip to content

Instantly share code, notes, and snippets.

@fawda123
Last active December 13, 2015 21:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fawda123/4978092 to your computer and use it in GitHub Desktop.
Save fawda123/4978092 to your computer and use it in GitHub Desktop.
catch_fun_v2
catch.fun.v2<-function(dows,trace=T,rich.out=F){
require(XML)
strt<-Sys.time()
#create object for output
fish.out<-vector('list',length(dows))
names(fish.out)<-dows
for(dow in dows){
if(trace){
cat(paste(dow,'; ',which(dow==dows),' of ',length(dows),sep=''),'\n')
flush.console()
}
#get raw html
html<-htmlTreeParse(
paste(
'http://www.dnr.state.mn.us/lakefind/showreport.html?downum=',
dow,
'&qitem=DOWLKNUM_1',
sep=''
),
useInternalNodes=TRUE
)
#no page for dow
check.site<-unlist(xpathApply(html, "//h3", xmlValue))
if(length(grep('Fish Sampled*',check.site))==0){
fish.out[dow]<-'No survey'
next
}
#page exists, find survey data if available
fish.tab<-unlist(xpathApply(html, "//table", xmlValue))
abun.ind<-grep('Species\nGear*',fish.tab)
leng.ind<-grep('Species\nNumber*',fish.tab)
#no survey data
if(grepl('No fish were collected',fish.tab[abun.ind])){
fish.out[dow]<-'No survey'
next
}
#no survey data, different table format
if(length(strsplit(fish.tab[abun.ind],'\n')[[1]])==7){
fish.out[dow]<-'No survey'
next
}
if(class(try({
smp.date<-{
tmp<-check.site[grep('Fish Sampled*',check.site)]
strs<-gregexpr("[0-9]",tmp)[[1]]
substr(tmp,strs[1],strs[length(strs)])
}
abun.tab<-{
tmp<-fish.tab[abun.ind]
tmp.prs<-strsplit(tmp,'\n')[[1]]
tmp.dat<-matrix(tmp.prs[8:length(tmp.prs)],ncol=6,byrow=T)[,c(1:3,5),drop=F]
blank.spp<-nchar(as.character(tmp.dat[,1]))<3
tmp.dat[blank.spp,1]<-tmp.dat[which(blank.spp)-1,1]
tmp.dat<-data.frame(tmp.dat,stringsAsFactors=F)
names(tmp.dat)<-c('Species','Net','Caught','Ave_wt')
tmp.dat
}
#length data absent
if(length(leng.ind)==0) leng.tab<-'No length data'
else{
leng.tab<-{
tmp<-fish.tab[leng.ind]
tmp.prs<-strsplit(tmp,'\n')[[1]]
tmp.dat<-matrix(tmp.prs[12:length(tmp.prs)],ncol=10,byrow=T)[,,drop=F]
tmp.dat<-data.frame(tmp.dat,stringsAsFactors=F)
names(tmp.dat)<-c('Species',tmp.prs[3:11])
tmp.dat
}
}
}))=='try-error'){
fish.out[[dow]]<-'massive failure'
next
}
fish.out[[dow]]<-list(date=smp.date,abundance=abun.tab,length=leng.tab)
}
print(Sys.time()-strt)
#format output as species only
if(rich.out)
return(lapply(
fish.out,
function(x){
if(length(x)==1) x
else list(date=x[[1]],species=sort(unique(x[[2]][,'Species'])))
}
))
fish.out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment