public
Created

  • Download Gist
catch_fun.r
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
catch.fun<-function(dows,fish.str,net.type='Trap',trace=T,clean=F){
 
require(XML)
 
strt<-Sys.time()
 
#create object for output
fish.out<-data.frame(expand.grid(dows,fish.str),NA,NA)
names(fish.out)<-c('dows','fish','fish.val','result')
 
 
for(dow in dows){
 
if(trace){
cat(paste(dow,'; ',which(dow==dows),' of ',length(dows),sep=''),'\n')
flush.console()
}
 
#get raw html
html<-htmlTreeParse(
paste(
'http://www.dnr.state.mn.us/lakefind/showreport.html?downum=',
dow,
'&qitem=DOWLKNUM_1',
sep=''
),
useInternalNodes=TRUE
)
 
#exception 1, no site for dow or no fish survey
check.site<-unlist(xpathApply(html, "//h3", xmlValue))
if(length(grep('Fish Sampled*',check.site))==0){
fish.out[dows==dow,4]<-'e1'
next
}
 
#survey exists, get survey data
#exception for surveys with only one table
#replace trace with zero
fish.tab<-unlist(xpathApply(html, "//tr", xmlValue))
fish.row<-grep('Species*',fish.tab)[1:2]
if(is.na(fish.row[2])) fish.row[2]<-grep('\nFor more information',fish.tab)
fish.tab<-fish.tab[fish.row[1]:fish.row[2]]
fish.tab<-gsub('trace','0',fish.tab)
 
if(class(try(
 
for(fish in fish.str){
 
#does fish exist in the table?
fish.row<-grep(paste('*',fish,'*',sep=''),fish.tab)
 
#row in output data.frame to fill, logical vector
out.row<-fish.out$dows==dow & fish.out$fish==fish
 
#exception 2, fish survey but spp not found
if(length(fish.row)==0){
fish.out[out.row,3:4]<-c(0,'e2')
next
}
 
#exception 3, fish found, two rows with spp label each row
if(length(fish.row)==2){
fish.val<-strsplit(
grep(net.type,fish.tab[fish.row],value=T),'\n'
)[[1]][3]
fish.out[out.row,3:4]<-c(fish.val,'e3')
next
}
 
check.rows<-fish.tab[c(fish.row,fish.row+1)]
 
check.net<-grepl(net.type,check.rows)
 
#exception 4, fish found, one row with fish label, net type not found
if(sum(check.net)==0){
fish.out[out.row,3:4]<-c(0,'e4')
next
}
 
#exception 5, fish found, one row with fish label,
#net type found in both rows
if(sum(check.net)==2){
fish.val<-strsplit(check.rows[1],'\n')[[1]][3]
fish.out[out.row,3:4]<-c(fish.val,'e5')
next
}
 
final.row<-strsplit(check.rows[check.net],'\n')[[1]]
 
#exception 6, fish found, one row with fish label,
#net type found in one row, row with net is for correct species
#(less than three characters or contains species name)
if(nchar(final.row[1])<3 | final.row[1]==fish){
fish.val<-final.row[3]
fish.out[out.row,3:4]<-c(fish.val,'e6')
next
}
 
#exception 7, fish found, one row with fish label,
#net type found in one row
fish.out[out.row,3:4]<-c(0,'e7')
 
}
 
))=='try-error'){
 
#exception 8, unknown
fish.out[fish.out$dows==dow,4]<-'e8'
 
}
 
}
 
fish.out<-split(fish.out,fish.out$fish)
 
print(Sys.time()-strt)
if(clean) return(lapply(fish.out,function(x) x[!is.na(x[,3]),-c(2,4)]))
 
fish.out
 
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.