Last active
December 13, 2022 08:13
-
-
Save vjrj/60b68a68af76bfaa6cd54c1a787d88b7 to your computer and use it in GitHub Desktop.
A node parse-transform utility to remove authors from scientificNames in GBIF Backbone to use in ALA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// DEPRECATED | |
// Use: https://github.com/living-atlases/gbif-taxonomy-for-la | |
// | |
const parse = require("csv-parse/lib/es5"); | |
const transform = require('stream-transform'); | |
const fs = require('fs'); | |
var readStream = fs.createReadStream("./gbif-backbone/Taxon.tsv.orig"); | |
const parser = parse({ | |
quote: null, | |
delimiter: '\t' | |
}); | |
var atFirstLine = true; | |
const transformer = transform(function(record) { | |
if (atFirstLine) { | |
// we skip the first line | |
atFirstLine = false; | |
} else { | |
const hasAuthor = record[6].length > 0; | |
const hasCanonicalName = record[7].length > 0; | |
const sciName = record[5]; | |
if (hasAuthor) { | |
if (hasCanonicalName) record[5] = record[7]; | |
else { | |
// no canonicalName so we try to remove author from sciName if it's there | |
var pos = record[5].lastIndexOf(" " + record[6]); | |
if (pos !== -1) { | |
record[5] = record[5].substr(0, pos); | |
} | |
} | |
} | |
} | |
return record.join('\t')+'\n'; | |
}); | |
readStream.on('open', function () { | |
readStream.pipe(parser).pipe(transformer).pipe(process.stdout) | |
}); | |
readStream.on('error', function(err) { | |
readStream.end(err); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Back to this. I updated my gist following the @rpfigueira comment.
Before:
and now:
Thanks!