Skip to content

Instantly share code, notes, and snippets.

@IsmailM
Last active August 29, 2015 14:19
Show Gist options
  • Save IsmailM/5d855eb6be080ef70c0f to your computer and use it in GitHub Desktop.
Save IsmailM/5d855eb6be080ef70c0f to your computer and use it in GitHub Desktop.
Find the ORF coverage
require 'bio'
def analyse_input_file(input_file)
overall_results = []
biofastafile = Bio::FlatFile.open(Bio::FastaFormat, input_file)
biofastafile.each_entry do |entry|
overall_results << analyse_orfs(entry.entry_id, entry.naseq)
end
overall_results
end
def analyse_orfs(query_id, seq)
results = {}
orfs = get_orfs(seq)
longest_orf = orfs.sort_by { |_key, hash| hash[:coverage] }.last
longest_orf_frame = longest_orf[1][:frame]
coverage = longest_orf[1][:coverage]
translated_length = longest_orf[1][:translated_length]
results[query_id] = {coverage: coverage,
longest_orf_frame: longest_orf_frame,
translated_length: translated_length}
overall_results
end
def get_orfs(seq, min_length)
result = {}
key = 0
min_length = min_length.to_i
(1..6).each do |f|
s = seq.translate(f)
s.scan(/(\w{#{min_length},})/) do |_orf|
orf_start = $~.offset(0)[0] + 1
orf_end = $~.offset(0)[1] + 1
coverage = (((orf_end - orf_start) / s.length.to_f) * 100).ceil
translated_length = s.length
key += 1
result[key] = { frame: f,
orf_start: orf_start,
orf_end: orf_end,
coverage: coverage,
translated_length: translated_length }
end
end
result
end
input_file = ARGV[0]
puts analyse_input_file(input_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment