Skip to content

Instantly share code, notes, and snippets.

@fawda123
Created January 21, 2013 23:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fawda123/4590405 to your computer and use it in GitHub Desktop.
Save fawda123/4590405 to your computer and use it in GitHub Desktop.
catch.fun<-function(dows,fish.str,net.type='Trap',trace=T,clean=F){
require(XML)
strt<-Sys.time()
#create object for output
fish.out<-data.frame(expand.grid(dows,fish.str),NA,NA)
names(fish.out)<-c('dows','fish','fish.val','result')
for(dow in dows){
if(trace){
cat(paste(dow,'; ',which(dow==dows),' of ',length(dows),sep=''),'\n')
flush.console()
}
#get raw html
html<-htmlTreeParse(
paste(
'http://www.dnr.state.mn.us/lakefind/showreport.html?downum=',
dow,
'&qitem=DOWLKNUM_1',
sep=''
),
useInternalNodes=TRUE
)
#exception 1, no site for dow or no fish survey
check.site<-unlist(xpathApply(html, "//h3", xmlValue))
if(length(grep('Fish Sampled*',check.site))==0){
fish.out[dows==dow,4]<-'e1'
next
}
#survey exists, get survey data
#exception for surveys with only one table
#replace trace with zero
fish.tab<-unlist(xpathApply(html, "//tr", xmlValue))
fish.row<-grep('Species*',fish.tab)[1:2]
if(is.na(fish.row[2])) fish.row[2]<-grep('\nFor more information',fish.tab)
fish.tab<-fish.tab[fish.row[1]:fish.row[2]]
fish.tab<-gsub('trace','0',fish.tab)
if(class(try(
for(fish in fish.str){
#does fish exist in the table?
fish.row<-grep(paste('*',fish,'*',sep=''),fish.tab)
#row in output data.frame to fill, logical vector
out.row<-fish.out$dows==dow & fish.out$fish==fish
#exception 2, fish survey but spp not found
if(length(fish.row)==0){
fish.out[out.row,3:4]<-c(0,'e2')
next
}
#exception 3, fish found, two rows with spp label each row
if(length(fish.row)==2){
fish.val<-strsplit(
grep(net.type,fish.tab[fish.row],value=T),'\n'
)[[1]][3]
fish.out[out.row,3:4]<-c(fish.val,'e3')
next
}
check.rows<-fish.tab[c(fish.row,fish.row+1)]
check.net<-grepl(net.type,check.rows)
#exception 4, fish found, one row with fish label, net type not found
if(sum(check.net)==0){
fish.out[out.row,3:4]<-c(0,'e4')
next
}
#exception 5, fish found, one row with fish label,
#net type found in both rows
if(sum(check.net)==2){
fish.val<-strsplit(check.rows[1],'\n')[[1]][3]
fish.out[out.row,3:4]<-c(fish.val,'e5')
next
}
final.row<-strsplit(check.rows[check.net],'\n')[[1]]
#exception 6, fish found, one row with fish label,
#net type found in one row, row with net is for correct species
#(less than three characters or contains species name)
if(nchar(final.row[1])<3 | final.row[1]==fish){
fish.val<-final.row[3]
fish.out[out.row,3:4]<-c(fish.val,'e6')
next
}
#exception 7, fish found, one row with fish label,
#net type found in one row
fish.out[out.row,3:4]<-c(0,'e7')
}
))=='try-error'){
#exception 8, unknown
fish.out[fish.out$dows==dow,4]<-'e8'
}
}
fish.out<-split(fish.out,fish.out$fish)
print(Sys.time()-strt)
if(clean) return(lapply(fish.out,function(x) x[!is.na(x[,3]),-c(2,4)]))
fish.out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment