Last active
December 13, 2022 08:13
-
-
Save vjrj/60b68a68af76bfaa6cd54c1a787d88b7 to your computer and use it in GitHub Desktop.
A node parse-transform utility to remove authors from scientificNames in GBIF Backbone to use in ALA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// DEPRECATED | |
// Use: https://github.com/living-atlases/gbif-taxonomy-for-la | |
// | |
const parse = require("csv-parse/lib/es5"); | |
const transform = require('stream-transform'); | |
const fs = require('fs'); | |
var readStream = fs.createReadStream("./gbif-backbone/Taxon.tsv.orig"); | |
const parser = parse({ | |
quote: null, | |
delimiter: '\t' | |
}); | |
var atFirstLine = true; | |
const transformer = transform(function(record) { | |
if (atFirstLine) { | |
// we skip the first line | |
atFirstLine = false; | |
} else { | |
const hasAuthor = record[6].length > 0; | |
const hasCanonicalName = record[7].length > 0; | |
const sciName = record[5]; | |
if (hasAuthor) { | |
if (hasCanonicalName) record[5] = record[7]; | |
else { | |
// no canonicalName so we try to remove author from sciName if it's there | |
var pos = record[5].lastIndexOf(" " + record[6]); | |
if (pos !== -1) { | |
record[5] = record[5].substr(0, pos); | |
} | |
} | |
} | |
} | |
return record.join('\t')+'\n'; | |
}); | |
readStream.on('open', function () { | |
readStream.pipe(parser).pipe(transformer).pipe(process.stdout) | |
}); | |
readStream.on('error', function(err) { | |
readStream.end(err); | |
}); |
@rpfigueira lucky you! I So have to reindex my errors personal index ;-)
Back to this. I updated my gist following the @rpfigueira comment.
Before:
nameindexer --testSearch "Methanobrevibacter ruminantium"
(...)
Search for name: Methanobrevibacter ruminantium
ID: 1000111
GUID: 1000111
Classification: "Balch & Wolfe, 1981 (Smith & Hungate, 1958)",Archaea,Euryarchaeota,Methanobacteria,Methanobacteriales,Methanobacteriaceae,Methanobrevibacter
Scientific name: Methanobrevibacter ruminantium (Smith & Hungate, 1958) Balch & Wolfe, 1981
Authorship: Balch & Wolfe, 1981 (Smith & Hungate, 1958)
Rank: SPECIES
Synonym: null
Match type: exactMatch
and now:
nameindexer --testSearch "Methanobrevibacter ruminantium"
(...)
Search for name: Methanobrevibacter ruminantium
ID: 1000111
GUID: 1000111
Classification: "Balch & Wolfe, 1981 (Smith & Hungate, 1958)",Archaea,Euryarchaeota,Methanobacteria,Methanobacteriales,Methanobacteriaceae,Methanobrevibacter
Scientific name: Methanobrevibacter ruminantium
Authorship: Balch & Wolfe, 1981 (Smith & Hungate, 1958)
Rank: SPECIES
Synonym: null
Match type: exactMatch
Thanks!
The issue reported by Tim gbif/checklistbank#100
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for alerting me @rpfigueira
Looks like combination authors and basionym authors are swapped in the authorship field, but they are correct in the scientificName field... I'll log an issue.