Skip to content

Instantly share code, notes, and snippets.

@cboursnell
Created November 7, 2014 13:58
Show Gist options
  • Save cboursnell/52a35fd4b62654630443 to your computer and use it in GitHub Desktop.
Save cboursnell/52a35fd4b62654630443 to your computer and use it in GitHub Desktop.
simulate reads from a reference with specified coverage
#!/usr/bin/env ruby
# simulate paired end reads from a reference
require 'trollop'
require 'rubystats'
ARGV[0] = "--help" if ARGV.length() == 0
opts = Trollop::options do
banner <<-EOS
Ruby Read Simulator
by Chris Boursnell
DESCRIPTION:
Simulate transcriptomic reads from a reference
USAGE:
ruby simulate_reads.rb [options]
OPTIONS:
EOS
opt :fasta, "fasta file to sequence from",
:type => String, :required => true
opt :expression, "file containing expression levels for each transcript",
:type => String, :required => true
opt :reads, "number of reads to generate",
:type => :int, :default => 10
opt :output, "prefix for output fastq files",
:type => String, :default => "reads"
end
fragment_size = 250
sd = 50
read_length = 100
Trollop::die "#{opts.fasta} doesn't exist" if !File.exist?(opts.fasta)
Trollop::die "#{opts.expression} doesn't exist" if !File.exist?(opts.expression)
expression = {}
File.open("#{opts.expression}").each_line do |line|
cols = line.chomp.split("\t")
expression[cols[0]] = cols[1].to_i
end
seq = {}
key = nil
File.open("#{opts.fasta}").each_line do |line|
if line =~ />(\S+)/
key = $1
seq[key] = ""
else
seq[key] << line.chomp
end
end
library = []
expression.each do |key, count|
count.times do
library << key
end
end
dist = Rubystats::NormalDistribution.new(fragment_size, sd)
reads = opts.reads
count=0
File.open("#{opts.output}_1.fastq", "w") do |left|
File.open("#{opts.output}_2.fastq", "w") do |right|
reads.times do
key = library.sample
fragment = dist.rng.round(0)
fragment = read_length+1 if fragment <= read_length
fragment = 350 if fragment > 350
transcript = seq[key]
len = transcript.length
pos = (rand * (len - fragment + 1)).round(0)
bases = transcript[pos..pos+fragment-1]
puts "key: #{key}\tpos: #{pos}"
puts "fragment: #{fragment}"
puts "bases: #{bases.length}"
# read 1
left.puts "@read:#{count}:#{key}/1"
left.puts bases[0..read_length-1]
left.puts "+"
left.puts "I"*read_length
# read 2
right.puts "@read:#{count}:#{key}/2"
right.puts bases[-read_length..-1].tr("ACGT", "TGCA").reverse
right.puts "+"
right.puts "I"*read_length
count+=1
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment