Skip to content

Instantly share code, notes, and snippets.

@antunderwood
Created November 5, 2009 16:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save antunderwood/227160 to your computer and use it in GitHub Desktop.
Save antunderwood/227160 to your computer and use it in GitHub Desktop.
# Author:: Anthony Underwood
# Date:: 2008-07-04
require 'rubygems'
require 'bio'
# Method to perform a remote blast
# input:: a sequence object , a string specifiying the program (blastn, blastp, blastx etc), a remote database (e.g nr-nt for non-redundant) and options (e.g -e 0.0001)
# output:: a Bio::Blast::Report object
def remote_blast(seq_obj, program, db = 'nr-nt', options = '', server = 'genomenet')
# create BLAST factory object
factory = Bio::Blast.remote(program, db, options, server)
report = factory.query(seq_obj)
end
# Method to perform a local blast
# input:: a sequence object, path to database and a string specifiying the program (blastn, blastp, blastx etc)
# output:: a Bio::Blast::Report object
def local_blast(seq_obj, path_to_blast_database, program = 'blastn')
factory = Bio::Blast.local(program, path_to_blast_database, '-m 8')
report = factory.query(seq_obj)
end
# Method to return details of the first hit from a blast search
# input:: a Bio::Blast::Report object
# output:: the hit defition, overlap and percent identity
def get_first_hit_details(report)
return get_hit_details(report, 1)
end
# Method to return details of the x hit from a blast search
# input:: a Bio::Blast::Report object
# output:: the hit defition, overlap and percent identity
def get_hit_details(report, hit_number)
hit = report.hits[hit_number-1]
if !hit.nil?
return hit.definition, hit.overlap, hit.percent_identity
else
return nil, nil, nil
end
end
module Bio
class Blast
def exec_ncbi(query)
host = "www.ncbi.nlm.nih.gov"
path = "/blast/Blast.cgi"
matrix = @matrix ? @matrix : 'blosum62'
filter = @filter ? @filter : 'L'
opt = []
opt.concat(@options) if @options
submit_params = {
'CMD' => 'Put',
'FORMAT_OBJECT' => 'Alignment',
'COMPOSITION_BASED_STATISTICS' => 'off',
'DATABASE' => @db,
'EXPECT' => '1e-3',
'MATRIX_NAME' => matrix,
'FILTER' => filter,
'PROGRAM' => @program,
'SERVICE' => 'plain',
'QUERY' => CGI.escape(query),
'OTHER_ADVANCED' => CGI.escape(Bio::Command.make_command_line_unix(opt)),
}
retrieve_params = {
'CMD' => 'Get',
'DESCRIPTIONS' => 100,
'FORMAT_TYPE' => 'Text',
'ALIGNMENTS' => 50,
'ALIGNMENT_VIEW' => 'Tabular'
}
data = []
submit_params.each do |k, v|
data.push("#{k}=#{v}") if v
end
report = nil
# begin
success = false
while !success
http = Bio::Command.new_http(host)
http.open_timeout = 300
http.read_timeout = 600
result, = http.post(path, data.join('&'))
output = result.body
# find RID
rid =""
output.each do |line|
if line =~ /RID = (\w+)\n$/
rid = $1
break
end
end
retrieve_param_data = []
retrieve_params.each do |k, v|
retrieve_param_data.push("#{k}=#{v}") if v
end
# push on RID value
retrieve_param_data.push("RID=#{rid}")
status = "WAITING"
# puts rid
# retrieve result
until status !~ /WAITING/
sleep 5
result, = http.post(path, retrieve_param_data.join('&'))
output = result.body
status = ""
output.each do |line|
if line =~ /Status=(.+)\n$/
status = $1
break
end
end
status = "NO STATUS" if status == ""
# puts status
end
# if READY reponse parse result and return report
if status == "READY"
raw_txt = output.split(/\<\/?PRE\>/)[1]
tab_format =""
raw_txt.each do |line|
tab_format += line unless line =~ /^#/ or line !~ /\t/# remove blank and commented lines
end
success = true
# else
# raise "There was an error obtaining the result. Status: #{status}"
end
end
return tab_format
end
end
end
require 'rubygems'
require 'bio'
require 'blast_functions'
require 'ncbi_blast'
ENV['http_proxy'] = "http://proxy_server_ip:port_numer"
neuraminidase_sequence = "ATGAATCCAAATCAGAAAATAATAACCATTGGATCAATCTGTCTGGTAGTCGGACTAATTAGCCTAATATTGCAAATAGGGAATATAATCTCAATATGGATTAGCCATTCAATTCAAACTGGAAGTCAAAACCATACTGGAATATGCAACCAAAACATCATTACCTATAAAAATAGCACCTGGGTAAAGGACACAACTTCAGTGATATTAACCGGCAATTCATCTCTTTGTCCCATCCGTGGGTGGGCTATATACAGCAAAGACAATAGCATAAGAATTGGTTCCAAAGGAGACGTTTTTGTCATAAGAGAGCCCTTTATTTCATGTTCTCACTTGGAATGCAGGACCTTTTTTCTGACCCAAGGTGCCTTACTGAATGACAGGCATTCAAATGGGACTGTTAAGGACAGAAGCCCTTATAGGGCCTTAATGAGCTGCCCTGTCGGTGAAGCTCCGTCCCCGTACAATTCAAGATTTGAATCGGTTGCTTGGTCAGCAAGTGCATGTCATGATGGCATGGGCTGGCTAACAATCGGAATTTCAGGTCCAGATAATGGAGCAGTGGCTGTATTAAAATACAACGGCATAATAACTGAAACCATAAAAAGTTGGAGGAAGAAAATATTGAGGACACAAGAGTCTGAATGTGCCTGTGTAAATGGTTCATGTTTTACTATAATGACTGATGGCCCGAGTGATGGGCTGGCCTCGTACAAAATTTTCAAGATCGAAAAGGGGAAGGTTACTAAATCAATAGAGTTGAATGCACCTAATTCTCACTATGAGGAATGTTCCTGTTACCCTGATACCGGCAAAGTGATGTGTGTGTGCAGAGACAATTGGCATGGTTCGAACCGGCCATGGGTGTCTTTCGATCAAAACCTGGATTATCAAATAGGATACATCTGCAGTGGGGTTTTCGGTGACAACCCGCGTCCCAAAGATGGAACAGGCAGCTGTGGTCCAGTGTATGTTGATGGAGCAAACGGAGTAAAGGGATTTTCATATAGGTATGGTAATGGTGTTTGGATAGGAAGGACCAAAAGTCACAGTTCCAGACATGGGTTTGAGATGATTTGGGATCCTAATGGATGGACAGAGACTGATAGTAAGTTCTCTGTGAGGCAAGATGTTGTGGCAATGACTGATTGGTCAGGGTATAGCGGGAGTTTCGTTCAACATCCTGAGCTAACAGGGCTAGACTGTATAAGGCCGTGCTTCTGGGTTGAATTAATCAGGGGACGACCTAAAGAAAAAACAATCTGGACTAGTGCGAGCAGCATTTCTTTTTGTGGCGTGAATAGTGATACTGTAGATTGGTCTTGGCCAGACGGTGCTGAGTTGCCATTCACCATTGACAAGTAG"
blast_report = remote_blast(neuraminidase_sequence, 'blastn', 'nr' , '' , 'ncbi')
(1..5).each do |hit_number|
definition, overlap, percent_identity = get_hit_details(blast_report, hit_number)
if !definition.nil?
accession = definition.split("|")[3]
accession.sub!(/\..+$/, "") # remove version number
server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
embl_text = server.fetch('embl', accession)
embl_object = Bio::EMBL.new(embl_text)
puts "\thit_number #{hit_number} --> " + embl_object.description
else
puts "\thit_number #{hit_number} --> none"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment