Skip to content

Instantly share code, notes, and snippets.

@michaelbarton
Created February 8, 2009 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save michaelbarton/60407 to your computer and use it in GitHub Desktop.
Save michaelbarton/60407 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'datamapper'
require 'bio'
DataMapper.setup(:default,{
:adapter => 'sqlite3',
:database => 'db.sqlite3'
})
class Gene
include DataMapper::Resource
property :id, Serial
property :name, String
property :sequence, Text, :format => /^M[\w\n]+\*$/m
end
class Count
include DataMapper::Resource
property :id, Serial
property :acid, String
property :count, Integer
end
task :environment do
require 'database'
require 'logger'
@log = Logger.new('log/analysis.log')
end
namespace :db do
desc 'Create database'
task :create => :environment do
DataMapper.auto_migrate!
end
desc 'Delete database'
task :delete do
system 'rm db.sqlite3'
end
end
namespace :load do
desc 'Loads protein sequences'
task :proteins => :environment do
Gene.all.destroy!
Zlib::GzipReader.open('data/cerevisiae.fasta.gz') do |file|
Bio::FlatFile.auto(file).each do |entry|
gene = Gene.create(
:name => entry.definition.split(' ').first,
:sequence => entry.data.strip
)
@log.warn("#{gene.name} not included") unless gene.valid?
end
end
@log.info "#{Gene.count}"
end
end
namespace :analyse do
desc 'Count amino acids in proteins'
task :count_acids => :environment do
Count.all.destroy!
# Count amino acids across all proteins
counts = Gene.all.inject(Hash.new) do |hash, gene|
composition = Bio::Sequence.auto(gene.sequence).composition
hash.merge!(composition) {|acid,one,two| one + two}
end
@log.info "#{counts['*']} stop codons skipped"
counts.delete('*')
counts.each do |acid,count|
Count.create(:acid => acid, :count => count)
@log.info "#{count} instances of #{acid}"
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment