Skip to content

Instantly share code, notes, and snippets.

@ejain
Last active January 27, 2016 17:59
Show Gist options
  • Save ejain/24197316610c70f66a60 to your computer and use it in GitHub Desktop.
Save ejain/24197316610c70f66a60 to your computer and use it in GitHub Desktop.
Converts Second Genome data files to a uBiome taxonomy file.
dnOTU_15 AV15-1068 16733
dnOTU_16 AV15-1068 20403
dnOTU_17 AV15-1068 32939
"use strict";
var csv = require("ya-csv");
loadTaxa("stooldb_taxonomy_v1.0.tsv")
.then(taxa => processSamples(process.openStdin(), taxa))
.then(toUBiome)
.then(printJson);
function loadTaxa(path) {
return new Promise((resolve, reject) => {
let taxa = {};
let reader = csv.createCsvFileReader(path, { columnsFromHeader : true, "separator" : "\t" });
reader.on("data", (row) => {
let id = row[""];
let taxon = {};
for (let field in row) {
if (field) {
let value = row[field].replace(/^.__/, "");
if (value !== "unclassified") {
taxon[field.toLowerCase()] = value;
}
}
}
taxa[id] = taxon;
});
reader.on("end", () => {
resolve(taxa);
});
});
}
function processSamples(stream, taxa) {
return new Promise((resolve, reject) => {
let samples = new Map();
let reader = csv.createCsvStreamReader(process.openStdin(), { "separator" : "\t" });
reader.on("data", (row) => {
let id = row[0];
let count = Number(row[2]);
let taxon = taxa[id];
for (let rank in taxon) {
let key = rank + ":" + taxon[rank];
samples.set(key, (samples.get(key) || 0) + count);
}
});
reader.on("end", () => {
resolve(samples);
});
});
}
function toUBiome(samples) {
let counts = []
let total = Math.max.apply(null, Array.from(samples.values()));
for (let entry of samples) {
let key = entry[0].split(":");
let value = entry[1];
counts.push({
"count" : value,
"count_norm" : Math.round(1000000 * value / total),
"tax_rank" : key[0],
"tax_name" : key[1]
});
}
return Promise.resolve({ "ubiome_bacteriacounts" : counts });
}
function printJson(value) {
console.log(JSON.stringify(value, null, 4));
}
Kingdom Phylum Class Order Family Genus Species
dnOTU_15 k__Bacteria p__Firmicutes c__Clostridia o__Clostridiales f__Ruminococcaceae g__unclassified s__unclassified
dnOTU_16 k__Bacteria p__Firmicutes c__Erysipelotrichi o__Erysipelotrichales f__Erysipelotrichaceae g__unclassified s__unclassified
dnOTU_17 k__Bacteria p__Firmicutes c__Clostridia o__Clostridiales f__Lachnospiraceae g__unclassified s__unclassified
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment