Skip to content

Instantly share code, notes, and snippets.

@rmaia
Created June 24, 2016 23:01
Show Gist options
  • Save rmaia/7ee6a8d1b7978e1b96a44935fb3c31fb to your computer and use it in GitHub Desktop.
Save rmaia/7ee6a8d1b7978e1b96a44935fb3c31fb to your computer and use it in GitHub Desktop.
require(XML)
scrapeBirdlife <- function(searchterm){
webaddress <- paste("http://www.birdlife.org/datazone/sowbsearchresults.php?a=ns&SearchTerms", searchterm, sep='=')
aa <- readLines(webaddress)
# scrap only web address, common name, species name, IUCN status
aa <- aa[grep('species/factsheet', aa)]
aa <- gsub('<li><a href=\"','',aa)
aa <- gsub('\">','--',aa)
aa <- gsub(' <i>','--',aa)
aa <- gsub("</i> \\(",'--',aa)
aa <- gsub("\\)<.a><.li>",'',aa)
# Make it a table
aa <- do.call(rbind, strsplit(aa, '--'))
aa[,1] <- paste('http://www.birdlife.org/datazone/',aa[,1],'/additional', sep='')
aa
}
# EXAMPLE
scrapeBirdlife('Sturnidae')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment