Last active
August 24, 2016 11:02
-
-
Save uludag/102eca4e96caba32b90bc731227f81a4 to your computer and use it in GitHub Desktop.
Example Node.js script to submit NCBI-BLAST sequence similarity searches to NCBI compute farms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Example Node.js script to submit NCBI-BLAST sequence similarity searches | |
to NCBI compute farms. | |
https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=DeveloperInfo | |
1.) Do not contact the server more often than once every three seconds. | |
2.) Do not poll for any single RID more often than once a minute. | |
3.) Use the URL parameter email, and tool, so that we can track your project | |
and contact you if there is a problem. | |
4.) Run scripts weekends or between 9 pm and 5 am Eastern Time weekday | |
if more than 50 searches will be submitted. | |
Before running this script install required node.js libraries: | |
> npm install node-rest-client fasta-parser minimist querystring fs | |
Example command line to run: | |
> node ncbi-sss-client.js\ | |
--qseqsfile=your/query/sequences.fasta\ | |
--email='your@email'\ | |
--database='nr'\ | |
--program='blastp' | |
TODO: | |
- Single search for multiple queries | |
- Support for optional NCBI-BLAST parameters | |
This script was developed at King Abdullah University of Science and Technology, | |
Thuwal, SA. | |
*/ | |
var Client = require('node-rest-client').Client; | |
var client = new Client(); | |
var fs = require('fs'); | |
var fasta = require('fasta-parser'); | |
var parser = fasta(); | |
var querystring = require('querystring'); | |
var argv = require('minimist')(process.argv.slice(2)); | |
var url = 'https://blast.ncbi.nlm.nih.gov'; | |
var path = '/blast/Blast.cgi'; | |
function waitJobCompletion(jobid, waittime) { | |
console.log("waitJobCompletion: " + jobid); | |
setTimeout(function () { status(jobid); }, waittime); | |
} | |
function run(args) { | |
var req = client.post(url + path, args, function (res) { | |
var re = /^ RID = (.*$)/m; | |
var l = re.exec(res); | |
if(l !==null && l.length > 0 && l[1].length > 0){ | |
var jobid = l[1]; | |
console.log('jobid: ' + jobid); | |
var rtoe = 60000; // TODO: read from resposnse returned | |
waitJobCompletion(jobid, rtoe); | |
} | |
else { | |
console.log("No job id:"); | |
console.log(res.toString()); | |
} | |
}); | |
req.on('requestTimeout', function (req) { | |
console.log('request has expired'); | |
req.abort(); | |
}); | |
req.on('responseTimeout', function (res) { | |
console.log('response has expired'); | |
}); | |
req.on('error', function (err) { | |
console.log('similarity search request failed', err); | |
}); | |
} | |
function getStatusRequest(jobid) { | |
var req = "?" | |
//+ 'email=' + argv.email | |
+ 'tool=ncbi-sss-nodesj-client' | |
+ '&CMD=Get&FORMAT_OBJECT=SearchInfo&RID=' + jobid; | |
return req; | |
} | |
function status(jobid) { | |
var req; | |
req = client.get(url + path + getStatusRequest(jobid), {}, function(res) { | |
var r = res.toString(); | |
console.log('status: ' + jobid); | |
if (r.search(/\s+Status=READY/m) != -1) { | |
if (r.search(/\s+ThereAreHits=yes/m) != -1) | |
{ | |
console.log("Search complete, retrieving results...\n"); | |
result(jobid, 'JSON2_S'); | |
} | |
else | |
{ | |
console.log("No hits found.\n"); | |
} | |
} | |
else if (r.search(/\s+Status=WAITING/m) != -1) { | |
waitJobCompletion(jobid, 60000); | |
} | |
else if (r.search(/\s+Status=UNKNOWN/m) != -1) { | |
console.log('Search ' + jobid + ' expired.'); | |
} | |
}); | |
req.on('error', function (err) { | |
console.log('get status request failed', err); | |
}); | |
} | |
function getResultRequest(jobid, format) { | |
var req = "?" | |
+ 'RESULTS_FILE=on' | |
+ '&RID=' + jobid | |
+ '&FORMAT_TYPE=' + format | |
+ '&CMD=Get&FORMAT_OBJECT=Alignment&RID=' + jobid; | |
return req; | |
} | |
function result(jobid, resulttype) { | |
var rreq = url + path + getResultRequest(jobid, resulttype); | |
console.log(rreq); | |
var req = client.get(rreq, {}, function (res) { | |
console.log('writing results for job ' + jobid); | |
fs.writeFileSync(jobid + ".json", JSON.stringify(res, null, '\t')); | |
}); | |
req.on('error', function (err) { | |
console.log('get result request failed', err); | |
}); | |
} | |
function readInputFastaFileAndSubmitSingleRequest(file) { | |
var seq = fs.readFileSync(file, 'utf8'); | |
console.log(seq.trim()); | |
var req = getSubmitRequest(seq.trim()); | |
run(req); | |
} | |
// Submit separate request for each sequence in the input file | |
function readInputFastaFileAndSubmitSeparateRequests(file) { | |
b = fs.readFileSync(file, 'utf8'); | |
i = 0; | |
parser.on('data', function (data) { | |
if (++i < 2) { | |
//console.log(JSON.parse(data.toString())) | |
a = JSON.parse(data.toString()); | |
sid = a.id; | |
seq = ">" + a.id + "\n" + a.seq; | |
console.log(seq); | |
run(getSubmitRequest(seq)); | |
} | |
}); | |
parser.write(b); | |
parser.end(); | |
} | |
// data for the submit request | |
function getSubmitRequest(seq) { | |
var data = querystring.stringify({ | |
'QUERY': seq, | |
'email': argv.email, | |
'DATABASE': argv.database, | |
'PROGRAM': argv.program, | |
'CMD': 'Put' | |
}); | |
var args = { | |
data: data, | |
headers: { | |
'Content-Type': 'application/x-www-form-urlencoded', | |
'Content-Length': Buffer.byteLength(data) | |
} | |
}; | |
return args; | |
} | |
if (argv.qseqsfile !== undefined) { | |
var qseqsfile = argv.qseqsfile; | |
readInputFastaFileAndSubmitSingleRequest(qseqsfile); | |
//readInputFastaFileAndSubmitSeparateRequests(qseqsfile); | |
} else { | |
console.log("No query sequences file specified"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment