The scrapenames()
function in taxize
resolves names using the Global Names Recognition and Discovery web service.
install from github to get fixes to the function
devtools::install_github("ropensci/scrapenames")
Load the library
library("taxize")
res <- scrapenames(url = 'http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0080498')
detailed metadat on the response
res$meta
## $token_url
## [1] "http://gnrd.globalnames.org/name_finder.json?token=J3YNA1xETsKykQeQOjFmJg"
##
## $input_url
## [1] "http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0080498"
##
## $file
## [1] ""
##
## $status
## [1] 200
##
## $engines
## [1] "TaxonFinder" "NetiNeti"
##
## $unique
## [1] FALSE
##
## $verbatim
## [1] TRUE
##
## $english
## [1] TRUE
##
## $execution_time
## $execution_time$find_names_duration
## [1] 5.184621
##
## $execution_time$total_duration
## [1] 11.38191
##
##
## $agent
## $agent$code
## [1] "200"
##
## $agent$content_type
## [1] "text/html;charset=UTF-8"
##
## $agent$filename
## [1] "article.html_id=10.1371_journal.pone.0080498"
##
##
## $created
## [1] "2015-08-06T00:37:42Z"
##
## $total
## [1] 362
and the data itself
head(res$data)
## verbatim scientificName offsetStart offsetEnd
## 1 Cyatta abscondita Cyatta abscondita 12 28
## 2 Scopus Scopus 2786 2791
## 3 Cyatta abscondita Cyatta abscondita 3036 3052
## 4 Curitiba Curitiba 4489 4496
## 5 Paulista Paulista 4938 4945
## 6 Cyatta abscondita Cyatta abscondita 5440 5456
## identifiedName
## 1 Cyatta abscondita
## 2 Scopus
## 3 Cyatta abscondita
## 4 Curitiba
## 5 Paulista
## 6 Cyatta abscondita
url <- 'http://www.plosone.org/article/fetchObject.action?uri=info%3Adoi%2F10.1371%2Fjournal.pone.0058268&representation=PDF'
res2 <- scrapenames(url = url)
head(res2$data)
## verbatim scientificName offsetStart offsetEnd
## 1 Xylaria Xylaria 24 30
## 2 Dendrobium precious Dendrobium precious 37 56
## 3 Petrini Petrini 1495 1501
## 4 Petrini Petrini 1505 1511
## 5 Dendrobium Dendrobium 2027 2036
## 6 Xylariaceae, Xylariaceae 2967 2978
## identifiedName
## 1 Xylaria
## 2 Dendrobium precious
## 3 Petrini
## 4 Petrini
## 5 Dendrobium
## 6 Xylariaceae
Contents of the file
speciesfile <- system.file("examples", "species.txt", package = "taxize")
readLines(speciesfile)
## [1] "Achnatherum eminens" "Achnatherum inebrians"
## [3] "Achnatherum lettermanii" "Achnatherum lobatum"
## [5] "Achnatherum nelsonii" "Achnatherum pinetorum"
## [7] "Achnatherum purpurascens" "Achnatherum robustum"
## [9] "Achnatherum sibiricum" "Achnatherum speciosum"
Find names in the file (this is too easy, but you can imagine files with other text mixed in with the names)
res3 <- scrapenames(file = speciesfile)
head(res3$data)
## verbatim scientificName offsetStart offsetEnd
## 1 Achnatherum eminens Achnatherum eminens 0 18
## 2 Achnatherum inebrians Achnatherum inebrians 20 40
## 3 Achnatherum lettermanii Achnatherum lettermanii 42 64
## 4 Achnatherum lobatum Achnatherum lobatum 66 84
## 5 Achnatherum nelsonii Achnatherum nelsonii 86 105
## 6 Achnatherum pinetorum Achnatherum pinetorum 107 127
## identifiedName
## 1 Achnatherum eminens
## 2 Achnatherum inebrians
## 3 Achnatherum lettermanii
## 4 Achnatherum lobatum
## 5 Achnatherum nelsonii
## 6 Achnatherum pinetorum
res4 <- scrapenames(text = 'A spider named Pardosa moesta Banks, 1892')
head(res4$data)
## verbatim scientificName offsetStart offsetEnd identifiedName
## 1 Pardosa moesta Pardosa moesta 15 28 Pardosa moesta