public
Last active

catch_fun_v2

  • Download Gist
catch_fun_v2.r
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
catch.fun.v2<-function(dows,trace=T,rich.out=F){
 
require(XML)
 
strt<-Sys.time()
 
#create object for output
fish.out<-vector('list',length(dows))
names(fish.out)<-dows
for(dow in dows){
 
if(trace){
cat(paste(dow,'; ',which(dow==dows),' of ',length(dows),sep=''),'\n')
flush.console()
}
 
#get raw html
html<-htmlTreeParse(
paste(
'http://www.dnr.state.mn.us/lakefind/showreport.html?downum=',
dow,
'&qitem=DOWLKNUM_1',
sep=''
),
useInternalNodes=TRUE
)
 
#no page for dow
check.site<-unlist(xpathApply(html, "//h3", xmlValue))
if(length(grep('Fish Sampled*',check.site))==0){
fish.out[dow]<-'No survey'
next
}
 
#page exists, find survey data if available
fish.tab<-unlist(xpathApply(html, "//table", xmlValue))
abun.ind<-grep('Species\nGear*',fish.tab)
leng.ind<-grep('Species\nNumber*',fish.tab)
#no survey data
if(grepl('No fish were collected',fish.tab[abun.ind])){
fish.out[dow]<-'No survey'
next
}
#no survey data, different table format
if(length(strsplit(fish.tab[abun.ind],'\n')[[1]])==7){
fish.out[dow]<-'No survey'
next
}
if(class(try({
smp.date<-{
tmp<-check.site[grep('Fish Sampled*',check.site)]
strs<-gregexpr("[0-9]",tmp)[[1]]
substr(tmp,strs[1],strs[length(strs)])
}
abun.tab<-{
tmp<-fish.tab[abun.ind]
tmp.prs<-strsplit(tmp,'\n')[[1]]
tmp.dat<-matrix(tmp.prs[8:length(tmp.prs)],ncol=6,byrow=T)[,c(1:3,5),drop=F]
blank.spp<-nchar(as.character(tmp.dat[,1]))<3
tmp.dat[blank.spp,1]<-tmp.dat[which(blank.spp)-1,1]
tmp.dat<-data.frame(tmp.dat,stringsAsFactors=F)
names(tmp.dat)<-c('Species','Net','Caught','Ave_wt')
tmp.dat
}
#length data absent
if(length(leng.ind)==0) leng.tab<-'No length data'
else{
leng.tab<-{
tmp<-fish.tab[leng.ind]
tmp.prs<-strsplit(tmp,'\n')[[1]]
tmp.dat<-matrix(tmp.prs[12:length(tmp.prs)],ncol=10,byrow=T)[,,drop=F]
tmp.dat<-data.frame(tmp.dat,stringsAsFactors=F)
names(tmp.dat)<-c('Species',tmp.prs[3:11])
tmp.dat
}
}
}))=='try-error'){
fish.out[[dow]]<-'massive failure'
next
}
fish.out[[dow]]<-list(date=smp.date,abundance=abun.tab,length=leng.tab)
}
print(Sys.time()-strt)
#format output as species only
if(rich.out)
return(lapply(
fish.out,
function(x){
if(length(x)==1) x
else list(date=x[[1]],species=sort(unique(x[[2]][,'Species'])))
}
))
fish.out
 
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.