stockhuman/DMP.js

## DMP.js
// install dependencies such as this with 'npm install xlsx'
const xlsx = require('xlsx')
const { gray, cyan, red, yellow, blue, black, green } = require('ansi-colors')

/**
 * USAGE
 *
 * node filter.js <searchterm> <file to search> <data file>
 */
const query = process.argv[2] // the term we're looking for
const qFile = process.argv[3] // declares the file we're extracting terms from
const dFile = process.argv[4] // the file we will search in with results from query

const log = msg => {
	console.log(black.bgWhite('[LOG]') + ' ' + gray(msg))
}

const success = msg => {
	log(green(msg))
}

const createSpreadSheet = (data, name) => {
	const outSheet = xlsx.utils.json_to_sheet(data)

	// create workbook and export
	const wb = xlsx.utils.book_new();

	xlsx.utils.book_append_sheet(wb, outSheet, name);
	xlsx.writeFile(wb, `./${name}.xlsx`)
	success(`${name}.xlsx written to disk`)
}

if (process.argv[2] == 'help') {
	log(`${cyan('USAGE:')}`)
	log(gray(`node filter.js ${yellow('<searchterm>')} ${blue('<file to search>')} ${red('<data file>')}`))
	log(`${cyan('EX:')} node filter.js ${yellow('ABC')} ${blue('DMP.xlsx')} ${red('10kMethSorted.xlsx')}`)
	process.exit(0)
}

// feedback
log('Looking for ' + blue(query))

let results = []
let now = new Date().toLocaleTimeString('fr-ca').replace(' ', '').replace(/:/g, '')
let output = `${query}_${now}`

const wkbk = xlsx.readFile(qFile, { raw: true })

wkbk.SheetNames.forEach(sheetName => { // in each sheet...
	// convert sheet into JSON rows, this is our row array
	const rows = xlsx.utils
		.sheet_to_row_object_array(wkbk.Sheets[sheetName])
		.filter(row =>
			row['UCSC_RefGene_Name'] &&
			typeof row['UCSC_RefGene_Name'] == 'string' // filters out bad data
		)

	// A row looks like this
	//{
  //   Name: 'cg18638434',
  //   UCSC_RefGene_Name: 'C14orf45;ENTPD5',
  //   Relation_to_Island: 'S_Shore',
  //   logFC: 4.13212463502205
	// },

	// for each row, alias the data as 'patient'
	rows.forEach(row => {
		const { Name, Relation_to_Island, logFC, UCSC_RefGene_Name } = row
		try {
			if (UCSC_RefGene_Name.split(';').includes(query)) {
				results.push({
					Name,
					Relation_to_Island,
					UCSC_RefGene_Name,
					logFC
				})
			}
		} catch (error) {
			log(`${red('Malformed Data:')}`)
			console.log(row)
		}
	})

	// save results
	if (results.length > 0) {
		log(`Found ${yellow(results.length)} results`)
		createSpreadSheet(results, output)
	}
	else { log('No results found'); process.exit(0) }
})

// read the file we're looking into
const wkbkdata = xlsx.readFile(dFile)
log('Loaded data file')

wkbkdata.SheetNames.forEach(sheetName => {

	const rows = xlsx.utils.sheet_to_row_object_array(wkbkdata.Sheets[sheetName])
	const out = []

	rows.forEach(row => {
		for (let index = 0; index < results.length; index++) {
			if (results[index].Name === row.Name) {
				out.push(row)
			}
		}
	})

	log(`Found ${yellow(out.length)} matching CGRP rows`)
	if (out.length > 0) {
		const e = xlsx.readFile(`./${output}.xlsx`, { raw: true })
		xlsx.utils.book_append_sheet(e, xlsx.utils.json_to_sheet(out), 'Matches')
		xlsx.writeFile(e, `./${output}.xlsx`)
		success('Data written to ' + output)
	}
})
	// install dependencies such as this with 'npm install xlsx'
	const xlsx = require('xlsx')
	const { gray, cyan, red, yellow, blue, black, green } = require('ansi-colors')

	/**
	* USAGE
	*
	* node filter.js <searchterm> <file to search> <data file>
	*/
	const query = process.argv[2] // the term we're looking for
	const qFile = process.argv[3] // declares the file we're extracting terms from
	const dFile = process.argv[4] // the file we will search in with results from query

	const log = msg => {
	console.log(black.bgWhite('[LOG]') + ' ' + gray(msg))
	}

	const success = msg => {
	log(green(msg))
	}

	const createSpreadSheet = (data, name) => {
	const outSheet = xlsx.utils.json_to_sheet(data)

	// create workbook and export
	const wb = xlsx.utils.book_new();

	xlsx.utils.book_append_sheet(wb, outSheet, name);
	xlsx.writeFile(wb, `./${name}.xlsx`)
	success(`${name}.xlsx written to disk`)
	}

	if (process.argv[2] == 'help') {
	log(`${cyan('USAGE:')}`)
	log(gray(`node filter.js ${yellow('<searchterm>')} ${blue('<file to search>')} ${red('<data file>')}`))
	log(`${cyan('EX:')} node filter.js ${yellow('ABC')} ${blue('DMP.xlsx')} ${red('10kMethSorted.xlsx')}`)
	process.exit(0)
	}

	// feedback
	log('Looking for ' + blue(query))

	let results = []
	let now = new Date().toLocaleTimeString('fr-ca').replace(' ', '').replace(/:/g, '')
	let output = `${query}_${now}`

	const wkbk = xlsx.readFile(qFile, { raw: true })

	wkbk.SheetNames.forEach(sheetName => { // in each sheet...
	// convert sheet into JSON rows, this is our row array
	const rows = xlsx.utils
	.sheet_to_row_object_array(wkbk.Sheets[sheetName])
	.filter(row =>
	row['UCSC_RefGene_Name'] &&
	typeof row['UCSC_RefGene_Name'] == 'string' // filters out bad data
	)

	// A row looks like this
	//{
	// Name: 'cg18638434',
	// UCSC_RefGene_Name: 'C14orf45;ENTPD5',
	// Relation_to_Island: 'S_Shore',
	// logFC: 4.13212463502205
	// },

	// for each row, alias the data as 'patient'
	rows.forEach(row => {
	const { Name, Relation_to_Island, logFC, UCSC_RefGene_Name } = row
	try {
	if (UCSC_RefGene_Name.split(';').includes(query)) {
	results.push({
	Name,
	Relation_to_Island,
	UCSC_RefGene_Name,
	logFC
	})
	}
	} catch (error) {
	log(`${red('Malformed Data:')}`)
	console.log(row)
	}
	})

	// save results
	if (results.length > 0) {
	log(`Found ${yellow(results.length)} results`)
	createSpreadSheet(results, output)
	}
	else { log('No results found'); process.exit(0) }
	})

	// read the file we're looking into
	const wkbkdata = xlsx.readFile(dFile)
	log('Loaded data file')

	wkbkdata.SheetNames.forEach(sheetName => {

	const rows = xlsx.utils.sheet_to_row_object_array(wkbkdata.Sheets[sheetName])
	const out = []

	rows.forEach(row => {
	for (let index = 0; index < results.length; index++) {
	if (results[index].Name === row.Name) {
	out.push(row)
	}
	}
	})

	log(`Found ${yellow(out.length)} matching CGRP rows`)
	if (out.length > 0) {
	const e = xlsx.readFile(`./${output}.xlsx`, { raw: true })
	xlsx.utils.book_append_sheet(e, xlsx.utils.json_to_sheet(out), 'Matches')
	xlsx.writeFile(e, `./${output}.xlsx`)
	success('Data written to ' + output)
	}
	})