Skip to content

Instantly share code, notes, and snippets.

@lmrodriguezr
Created January 26, 2018 16:59
Show Gist options
  • Save lmrodriguezr/f2a40415a0ee6b396dd27d7c111f97d0 to your computer and use it in GitHub Desktop.
Save lmrodriguezr/f2a40415a0ee6b396dd27d7c111f97d0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
class MultiGeneBlast
attr :file, :query, :genes, :hits
def initialize(file)
@file = file
@genes = MultiGeneBlast::Set.new
@hits = MultiGeneBlast::Set.new
load_from_file!
end
def load_from_file!
File.open(file, "r") do |fh|
load_header(fh)
load_hits(fh)
load_details(fh)
end
end
private
def load_header(fh)
while !fh.eof?
ln = fh.gets.chomp
case ln
when '', /\ATable of genes, .*:/ # Do nothing
when /\ASignificant hits:/ ; break
when /\AClusterBlast scores for (.*)/
@query = $1
else
@genes << MultiGeneBlast::Gene.new(ln)
end
end
end
def load_hits(fh)
while !fh.eof?
case fh.gets.chomp
when '' # Do nothing
when /\ADetails:/ ; break
when /\A(\d+)\. (\S+)\s+(.*)\./
@hits << MultiGeneBlast::Hit.new($1, $2, $3)
end
end
end
def load_details(fh)
while !fh.eof?
hit = load_details_header(fh)
load_details_genes(fh, hit)
load_details_hits(fh, hit)
end
end
def load_details_header(fh)
h = nil
while !fh.eof?
case fh.gets.chomp
when '', '>>' # Do nothing
when /\ATable of genes, .*:/ ; break
when /\A(\d+)\. (.*)/
h = @hits[$1.to_i]
raise "Bad name: got #{$2}, expected #{h.gene}" if h.gene!=$2
when /\ASource: (.*)\./
raise "Bad source: got #{$1}, expected #{h.source}" if h.source!=$1
when /\ANumber .* hits to this cluster: (\d+)/
h.proteins_with_hits = $1.to_i
when /\AMultiGeneBlast score: ([\d\.]+)/
h.score = $1.to_f
when /\ACumulative Blast bit score: ([\d\.]+)/
h.cumulative_score = $1.to_f
end
end
h
end
def load_details_genes(fh, hit)
while !fh.eof?
ln = fh.gets.chomp
case ln
when '' # Do nothing
when /\ATable of Blast hits .*:/ ; break
else
hit.genes << MultiGeneBlast::Gene.new(ln)
end
end
end
def load_details_hits(fh, hit)
while !fh.eof?
ln = fh.gets.chomp
case ln
when '' # Do nothing
when '>>' ; break
else
hit.blast_hits << MultiGeneBlast::BlastHit.new(ln)
end
end
end
end
class MultiGeneBlast::Gene
attr :name, :loc_start, :loc_end, :strand, :annotation, :locus_tag
alias index name
def initialize(str)
s = str.split(/\s+/)
[1,2].each{ |i| s[i] = s[i].to_i }
@name, @loc_start, @loc_end, @strand, @annotation, @locus_tag = *s
end
end
class MultiGeneBlast::Hit
attr :rank, :gene, :source
attr_accessor :proteins_with_hits, :score, :cumulative_score
attr_accessor :genes, :blast_hits
alias index rank
def initialize(rank, gene, source)
@rank, @gene, @source = rank.to_i, gene, source
@genes = MultiGeneBlast::Set.new
@blast_hits = MultiGeneBlast::Set.new
end
end
class MultiGeneBlast::BlastHit
attr :query, :subject, :identity, :score, :coverage, :evalue
alias index subject
def initialize(str)
s = str.split(/\s+/)
(2..5).each{ |i| s[i] = s[i].to_f }
@query, @subject, @identity, @score, @coverage, @evalue = *s
end
end
class MultiGeneBlast::Set < Hash
def <<(v) self[v.index]=v ; end
end
# Main:
file = ARGV.shift
unless file
$stderr.puts "Usage: #{__FILE__} MultiGeneBlast.txt"
exit 0
end
mgb = MultiGeneBlast.new(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment