Skip to content

Instantly share code, notes, and snippets.

@stockhuman
Created August 3, 2019 21:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stockhuman/cbdcf5c655abe87a660782a3add74d53 to your computer and use it in GitHub Desktop.
Save stockhuman/cbdcf5c655abe87a660782a3add74d53 to your computer and use it in GitHub Desktop.
// install dependencies such as this with 'npm install xlsx'
const xlsx = require('xlsx')
const { gray, cyan, red, yellow, blue, black, green } = require('ansi-colors')
/**
* USAGE
*
* node filter.js <searchterm> <file to search> <data file>
*/
const query = process.argv[2] // the term we're looking for
const qFile = process.argv[3] // declares the file we're extracting terms from
const dFile = process.argv[4] // the file we will search in with results from query
const log = msg => {
console.log(black.bgWhite('[LOG]') + ' ' + gray(msg))
}
const success = msg => {
log(green(msg))
}
const createSpreadSheet = (data, name) => {
const outSheet = xlsx.utils.json_to_sheet(data)
// create workbook and export
const wb = xlsx.utils.book_new();
xlsx.utils.book_append_sheet(wb, outSheet, name);
xlsx.writeFile(wb, `./${name}.xlsx`)
success(`${name}.xlsx written to disk`)
}
if (process.argv[2] == 'help') {
log(`${cyan('USAGE:')}`)
log(gray(`node filter.js ${yellow('<searchterm>')} ${blue('<file to search>')} ${red('<data file>')}`))
log(`${cyan('EX:')} node filter.js ${yellow('ABC')} ${blue('DMP.xlsx')} ${red('10kMethSorted.xlsx')}`)
process.exit(0)
}
// feedback
log('Looking for ' + blue(query))
let results = []
let now = new Date().toLocaleTimeString('fr-ca').replace(' ', '').replace(/:/g, '')
let output = `${query}_${now}`
const wkbk = xlsx.readFile(qFile, { raw: true })
wkbk.SheetNames.forEach(sheetName => { // in each sheet...
// convert sheet into JSON rows, this is our row array
const rows = xlsx.utils
.sheet_to_row_object_array(wkbk.Sheets[sheetName])
.filter(row =>
row['UCSC_RefGene_Name'] &&
typeof row['UCSC_RefGene_Name'] == 'string' // filters out bad data
)
// A row looks like this
//{
// Name: 'cg18638434',
// UCSC_RefGene_Name: 'C14orf45;ENTPD5',
// Relation_to_Island: 'S_Shore',
// logFC: 4.13212463502205
// },
// for each row, alias the data as 'patient'
rows.forEach(row => {
const { Name, Relation_to_Island, logFC, UCSC_RefGene_Name } = row
try {
if (UCSC_RefGene_Name.split(';').includes(query)) {
results.push({
Name,
Relation_to_Island,
UCSC_RefGene_Name,
logFC
})
}
} catch (error) {
log(`${red('Malformed Data:')}`)
console.log(row)
}
})
// save results
if (results.length > 0) {
log(`Found ${yellow(results.length)} results`)
createSpreadSheet(results, output)
}
else { log('No results found'); process.exit(0) }
})
// read the file we're looking into
const wkbkdata = xlsx.readFile(dFile)
log('Loaded data file')
wkbkdata.SheetNames.forEach(sheetName => {
const rows = xlsx.utils.sheet_to_row_object_array(wkbkdata.Sheets[sheetName])
const out = []
rows.forEach(row => {
for (let index = 0; index < results.length; index++) {
if (results[index].Name === row.Name) {
out.push(row)
}
}
})
log(`Found ${yellow(out.length)} matching CGRP rows`)
if (out.length > 0) {
const e = xlsx.readFile(`./${output}.xlsx`, { raw: true })
xlsx.utils.book_append_sheet(e, xlsx.utils.json_to_sheet(out), 'Matches')
xlsx.writeFile(e, `./${output}.xlsx`)
success('Data written to ' + output)
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment