Created
January 21, 2013 23:13
-
-
Save fawda123/4590405 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
catch.fun<-function(dows,fish.str,net.type='Trap',trace=T,clean=F){ | |
require(XML) | |
strt<-Sys.time() | |
#create object for output | |
fish.out<-data.frame(expand.grid(dows,fish.str),NA,NA) | |
names(fish.out)<-c('dows','fish','fish.val','result') | |
for(dow in dows){ | |
if(trace){ | |
cat(paste(dow,'; ',which(dow==dows),' of ',length(dows),sep=''),'\n') | |
flush.console() | |
} | |
#get raw html | |
html<-htmlTreeParse( | |
paste( | |
'http://www.dnr.state.mn.us/lakefind/showreport.html?downum=', | |
dow, | |
'&qitem=DOWLKNUM_1', | |
sep='' | |
), | |
useInternalNodes=TRUE | |
) | |
#exception 1, no site for dow or no fish survey | |
check.site<-unlist(xpathApply(html, "//h3", xmlValue)) | |
if(length(grep('Fish Sampled*',check.site))==0){ | |
fish.out[dows==dow,4]<-'e1' | |
next | |
} | |
#survey exists, get survey data | |
#exception for surveys with only one table | |
#replace trace with zero | |
fish.tab<-unlist(xpathApply(html, "//tr", xmlValue)) | |
fish.row<-grep('Species*',fish.tab)[1:2] | |
if(is.na(fish.row[2])) fish.row[2]<-grep('\nFor more information',fish.tab) | |
fish.tab<-fish.tab[fish.row[1]:fish.row[2]] | |
fish.tab<-gsub('trace','0',fish.tab) | |
if(class(try( | |
for(fish in fish.str){ | |
#does fish exist in the table? | |
fish.row<-grep(paste('*',fish,'*',sep=''),fish.tab) | |
#row in output data.frame to fill, logical vector | |
out.row<-fish.out$dows==dow & fish.out$fish==fish | |
#exception 2, fish survey but spp not found | |
if(length(fish.row)==0){ | |
fish.out[out.row,3:4]<-c(0,'e2') | |
next | |
} | |
#exception 3, fish found, two rows with spp label each row | |
if(length(fish.row)==2){ | |
fish.val<-strsplit( | |
grep(net.type,fish.tab[fish.row],value=T),'\n' | |
)[[1]][3] | |
fish.out[out.row,3:4]<-c(fish.val,'e3') | |
next | |
} | |
check.rows<-fish.tab[c(fish.row,fish.row+1)] | |
check.net<-grepl(net.type,check.rows) | |
#exception 4, fish found, one row with fish label, net type not found | |
if(sum(check.net)==0){ | |
fish.out[out.row,3:4]<-c(0,'e4') | |
next | |
} | |
#exception 5, fish found, one row with fish label, | |
#net type found in both rows | |
if(sum(check.net)==2){ | |
fish.val<-strsplit(check.rows[1],'\n')[[1]][3] | |
fish.out[out.row,3:4]<-c(fish.val,'e5') | |
next | |
} | |
final.row<-strsplit(check.rows[check.net],'\n')[[1]] | |
#exception 6, fish found, one row with fish label, | |
#net type found in one row, row with net is for correct species | |
#(less than three characters or contains species name) | |
if(nchar(final.row[1])<3 | final.row[1]==fish){ | |
fish.val<-final.row[3] | |
fish.out[out.row,3:4]<-c(fish.val,'e6') | |
next | |
} | |
#exception 7, fish found, one row with fish label, | |
#net type found in one row | |
fish.out[out.row,3:4]<-c(0,'e7') | |
} | |
))=='try-error'){ | |
#exception 8, unknown | |
fish.out[fish.out$dows==dow,4]<-'e8' | |
} | |
} | |
fish.out<-split(fish.out,fish.out$fish) | |
print(Sys.time()-strt) | |
if(clean) return(lapply(fish.out,function(x) x[!is.na(x[,3]),-c(2,4)])) | |
fish.out | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment