sld/ROSALIND-20.rb

## ROSALIND-20.rb
require 'set'
require_relative 'graph_and_ant_colony' # https://gist.github.com/sld/2711080

class String
  def indexes letter
    (0 .. self.length - 1).find_all { |i| self[i,1] == letter }
  end
end


class Array
  def swap ind1, ind2
    tmp = self[ind1]
    self[ind1] = self[ind2]
    self[ind2] = tmp
  end
end

# Task names starts by " Task: TASK_NAME"
# Example: "# Task: Computing GC Content "
module Rosalind20
  def count_dna_nucliotides( dna_string )
    {'A' => dna_string.count('A'), 'C' => dna_string.count('C'), 'G' => dna_string.count('G'), 'T' => dna_string.count('T')}
  end


  def dna_to_rna( dna_string )
    dna_string.tr "T", "U"
  end


  def reverse_complement( dna_string )
    dna_string.reverse.tr("A","Z").tr("C","Q").tr("T","A").tr("G","C").tr("Q","G").tr("Z","T")
  end


  def parse_fasta_file(filename)
    fasta_pairs = {}
    fasta_id = nil
    File.open(filename).each_line do |line|
      if line[0] == '>'
        fasta_id = line.chomp
        fasta_id[0] = ""
        fasta_pairs[fasta_id] = ""
      else
        fasta_pairs[fasta_id] += line.chomp
      end
    end
    return fasta_pairs
  end


  def parse_adjacency_list_file filename
    graph_data = {:nodes_count => nil, :adjacency_list => []}
    File.open(filename).each_line do |line|
      splitted_line = line.split
      if splitted_line.count == 2
        graph_data[:adjacency_list] << [splitted_line[0].to_i, splitted_line[1].to_i]
      else
        graph_data[:nodes_count] = splitted_line.first.to_i
      end
    end
    return graph_data
  end


  def parse_set_file filename
    lines = File.open(filename).lines.to_a

    universum_set = (1..lines[0].to_i).to_a

    lines[1].delete! "{}"
    lines[2].delete! "{}"
    set1 = lines[1].split(",").map{|e| e.to_i}
    set2 = lines[2].split(",").map{|e| e.to_i}

    return [universum_set.to_set, set1.to_set, set2.to_set]
  end


  def calc_gc(dna_string)
    100.0 * ( dna_string.count('G') + dna_string.count('C') ) / dna_string.length
  end


  # Task: Computing GC Content
  def find_max_gc( fasta_pairs )
    max_fasta_pair = fasta_pairs.max_by{ |fasta_id, dna_string| calc_gc(dna_string) }
    return [max_fasta_pair[0], calc_gc(max_fasta_pair[1])]
  end


  def get_substring_data dna_string, substring_length, substring_ind
    start_ind = substring_ind
    end_ind = start_ind + substring_length
    {:string => dna_string[start_ind...end_ind], :start_ind => start_ind + 1}
  end


  # Task: Finding a Motif in DNA
  def get_substring_start_indexes( dna_string, substring )
    indexes = []
    legal_substring_count = dna_string.length - substring.length
    for i in 0...legal_substring_count
      substring_data = get_substring_data(dna_string, substring.length, i)
      substring_data[:string] == substring ? indexes << substring_data[:start_ind] : nil
    end
    return indexes
  end


  # Task: A Brief Introduction to Graph Theory
  def make_directed_graph fasta_pairs, k
    graph = []
    fasta_pairs.each do |top_fasta_id, top_dna_string|
      fasta_pairs.each do |fasta_id, dna_string|
        if top_dna_string[-k..-1] == dna_string[0...k] && top_fasta_id != fasta_id
          graph << [top_fasta_id, fasta_id]
        end
      end
    end

    return graph
  end


  # ("AGACCTGCCG", "CCTGCCGGAA") => CCTGCCG
  def find_start_substring( src_str, dst_str )
    start_substring = ""
    indexes = dst_str.indexes(src_str[-1]).reverse
    indexes.each do |ind|
      if src_str[-1-ind..-1] == dst_str[0..ind]
        start_substring = dst_str[0..ind]
        break
      end
    end

    return start_substring
  end


  def make_overlap_graph(fasta_pairs)
    overlap_graph = []
    strings_arr = fasta_pairs.values

    for i in 0...strings_arr.length
      for j in i+1...strings_arr.length
        if strings_arr[i][strings_arr[j]] || strings_arr[j][strings_arr[i]]
          next
        else
          start_substring = find_start_substring( strings_arr[i], strings_arr[j] )
          end_substring = find_start_substring( strings_arr[j], strings_arr[i] )
          overlap_graph << {:src => strings_arr[i], :dst => strings_arr[j], :to => start_substring, :from => end_substring}
        end
      end
    end

    return overlap_graph
  end


  def get_edges_from_overlap_graph overlap_graph
    nodes = Set.new
    edges = Set.new
    overlap_graph.each do |edge|
      node_src = nodes.find{|e| e.name == edge[:src]} || Node.new(edge[:src])
      node_dst = nodes.find{|e| e.name == edge[:dst]} || Node.new(edge[:dst])
      nodes << node_dst
      nodes << node_src
      if edge[:to].length > 0
        new_edge = Edge.new(node_src, node_dst, -edge[:to].length)
        edges << new_edge if !edges.find{|e| e == new_edge}
      end

      if edge[:from].length > 0
        new_edge = Edge.new(node_dst, node_src, -edge[:from].length)
        edges << new_edge if !edges.find{|e| e == new_edge}
      end
    end

    return edges
  end


  def get_superstring_from_route route
    superstring = ""
    route.edges.each_with_index do |edge, i|
      src_str = edge.from.name
      dst_str = edge.to.name
      substr_weight = edge.weight
      if i == 0
        superstring = src_str.clone
        superstring[substr_weight..-1] = dst_str
      else
        superstring[substr_weight..-1] = dst_str
      end
    end
    return superstring
  end


  def check_superstring superstring, test_strings
    test_strings.each do|str|
      return "Uncorrect superstring!" if !superstring[str]
    end
    return "Good superstring!"
  end


  # Task: Genome Assembly as Shortest Superstring
  def get_best_route(overlap_graph, need_print = false)
    good_edges = overlap_graph.get_more_than_half_weight_edges
    good_graph = Graph.new good_edges, :bidirectional => false, :sort_edges => true
    superstring = get_superstring_from_route(good_graph)
    p superstring.length
    p superstring if need_print
  end


  # Task: Counting Point Mutations
  def hamming_distance str1, str2
    distance = 0
    for i in 0...str1.length
      if str1[i] != str2[i]
        distance += 1
      end
    end
    return distance
  end


  def get_corrects dna_strings
    appears = {}
    for i in 0...dna_strings.length
      for j in i+1...dna_strings.length
        appears[dna_strings[i]] ||= 0
        if dna_strings[i] == dna_strings[j] || reverse_complement(dna_strings[i]) == dna_strings[j]
          appears[dna_strings[i]] += 1
        end
      end
    end
    appears.find_all{|str, appears_count| appears_count > 0}.collect{|arr| arr[0]}
  end


  def get_corrections(incorrects, corrects)
    corrections = []
    incorrects.each do |incorrect|
      corrects.each do |correct|
        ham_dist = hamming_distance(incorrect, correct)
        if ham_dist == 1
          corrections << [incorrect, correct]
          break
        else
          correct_complement = reverse_complement(correct)
          compl_ham_dist = hamming_distance(incorrect, correct_complement)
          if compl_ham_dist == 1
            corrections << [incorrect, correct_complement]
            break
          end
        end
      end
    end
    return corrections
  end


  # Task: Error Correction in Reads
  def get_corrections_from_strings(dna_strings)
    corrects = get_corrects(dna_strings)
    incorrects = dna_strings - corrects
    corrections = get_corrections(incorrects, corrects)

    corrections.each{ |(incorrect, correction)| puts "#{incorrect}->#{correction}" }
  end


  def codon_table
    codon_table_arr =  "UUU F      CUU L      AUU I      GUU V
                        UUC F      CUC L      AUC I      GUC V
                        UUA L      CUA L      AUA I      GUA V
                        UUG L      CUG L      AUG M      GUG V
                        UCU S      CCU P      ACU T      GCU A
                        UCC S      CCC P      ACC T      GCC A
                        UCA S      CCA P      ACA T      GCA A
                        UCG S      CCG P      ACG T      GCG A
                        UAU Y      CAU H      AAU N      GAU D
                        UAC Y      CAC H      AAC N      GAC D
                        UAA Stop   CAA Q      AAA K      GAA E
                        UAG Stop   CAG Q      AAG K      GAG E
                        UGU C      CGU R      AGU S      GGU G
                        UGC C      CGC R      AGC S      GGC G
                        UGA Stop   CGA R      AGA R      GGA G
                        UGG W      CGG R      AGG R      GGG G".split.each_slice(2).to_a
    codon_table_hash = Hash[codon_table_arr]
    codon_table_hash["UAA"] = ""
    codon_table_hash["UAG"] = ""
    codon_table_hash["UGA"] = ""
    return codon_table_hash
  end


  # Task: Translating RNA into Protein
  def rna_into_protein rna_string
    rna_string.chars.each_slice(3).map(&:join).map{|str3| codon_table[str3]}.join
  end


  def permutations(array, permutations_array, ind, permutation_ind)
    if permutation_ind == array.length-1
      return permutations_array
    else
      for i in ind...array.length
        new_arr = array.clone
        new_arr.swap(ind, i)
        permutations_array << new_arr
        permutations(new_arr, permutations_array, ind+1, permutation_ind+1)
      end
    end
    return permutations_array.uniq
  end


  # Task: RNA Splicing
  def rna_without_introns dna_string, introns
    introns.each{ |intron| dna_string[intron] = "" }
    rna_into_protein( dna_to_rna( dna_string ) )
  end


  # Task: Enumerating Gene Orders
  def permutations_of_length length
    array = (1..length).to_a
    total_length = array.inject(1){|s,e| s*e}
    permutations = permutations(array, [], 0, 0)
    file = File.new 'perm', 'w'
    file.puts "#{total_length}"
    permutations.each{|perm| file.puts perm.join(" ")}
  end


  # Task: Enumerating k-mers Lexicographically
  def k_mers(array, k)
    array.repeated_permutation(k).each{|sub_arr| puts sub_arr.join}
  end


  # Task: k-Mer Composition
  def composition_4mer( dna_string )
    cnt = 4
    permutations = Hash[['A', 'C', 'G', 'T'].repeated_permutation(cnt).map{|e| [e.join, 0]}]
    for i in 0..(dna_string.length - cnt)
      permutations[dna_string[i...i+cnt]] += 1 if permutations[dna_string[i...i+cnt]]
    end
    return permutations.values
  end


  # Task: Inferring mRNA from Protein
  def protein_to_rna_possible_count( protein_string, mod = 1000000 )
    rna_freq_table = {}
    codon_table.values.each do |elem|
      rna_freq_table[elem] ||= 0
      rna_freq_table[elem] += 1
    end

    possible_count = 1
    protein_string.each_char do |char|
      possible_count = rna_freq_table[char] * possible_count
    end
    return ( ( possible_count * rna_freq_table[""] ) % mod )
  end


  # Task: Mendel's First Law
  # k - dominant homo; m - hetero; n - recessive homo
  def dominant_probability( k, m, n)
    population = k + m + n

    (k / population) * ((k-1) / (population - 1) + m / (population - 1) + n / (population - 1)) +
    (m / population) * (k / (population - 1) + 0.75*(m-1) / (population - 1) + 0.5*n / (population - 1)) +
    (n / population) * (k / (population - 1) + 0.5*m / (population - 1))
  end


  # Task: Completing a Tree
  def min_edges_to_be_tree adjacency_list, nodes_count
    nodes_groups = Array.new( nodes_count )
    (1..nodes_count).to_a.each{ |i| nodes_groups[i-1] = [i].to_set }

    adjacency_list.each do |(node_from, node_to)|
      has_group = false
      for i in 0...nodes_groups.length
        node_group = nodes_groups[i]
        if node_group.include?(node_from) || node_group.include?(node_to)
          has_group = true
          node_group << node_from
          node_group << node_to
        end
      end
      nodes_groups << [node_from, node_to].to_set if !has_group
    end

    (1..nodes_count).to_a.each do |node|
      to_merge_groups = []
      nodes_groups.each_with_index do |node_group, i|
        to_merge_groups << i if node_group.include?(node)
      end
      merged_group = [].to_set
      to_merge_groups.each{|ind| merged_group += nodes_groups[ind]; nodes_groups[ind] = nil}
      nodes_groups.compact!
      nodes_groups << merged_group
    end

    return (nodes_groups.count - 1)
  end


  # Task: Counting Subsets
  def subsets_count n
    2**n % 1000000
  end


  # Task: Introduction to Set Operations
  def set_operations set1, set2, universum_set
    {:union => set1 + set2, :intersection => set1 & set2, :difference1 => set1 - set2, :difference2 => set2 - set1,
     :complement1 => universum_set - set1, :complement2 => universum_set - set2}
  end


  # Task: Constructing a De Bruijn Graph
  def make_de_bruijn dna_strings
    for i in 0...dna_strings.length
      dna_strings << reverse_complement(dna_strings[i])
    end
    dna_strings.sort!
    dna_strings = dna_strings.to_set

    k = dna_strings.first.length - 1
    edges = Set.new
    nodes = {}
    dna_strings.each do |dna_str|
      nodes[dna_str[0...k]] ||= Node.new dna_str[0...k]
      nodes[dna_str[1...k+1]] ||= Node.new dna_str[1...k+1]
      edges << Edge.new( nodes[dna_str[0...k]], nodes[dna_str[1...k+1]], 0)
    end
    return edges
  end
end

include Rosalind20

dna_strings = %w(
                  TGAT
                  CATG
                  TCAT
                  ATGC
                  CATC
                  CATC
                )
edges = make_de_bruijn( dna_strings )
edges.each {|edge| puts edge.to_s(:with_brackets => true)}
	require 'set'
	require_relative 'graph_and_ant_colony' # https://gist.github.com/sld/2711080

	class String
	def indexes letter
	(0 .. self.length - 1).find_all { \|i\| self[i,1] == letter }
	end
	end


	class Array
	def swap ind1, ind2
	tmp = self[ind1]
	self[ind1] = self[ind2]
	self[ind2] = tmp
	end
	end

	# Task names starts by " Task: TASK_NAME"
	# Example: "# Task: Computing GC Content "
	module Rosalind20
	def count_dna_nucliotides( dna_string )
	{'A' => dna_string.count('A'), 'C' => dna_string.count('C'), 'G' => dna_string.count('G'), 'T' => dna_string.count('T')}
	end


	def dna_to_rna( dna_string )
	dna_string.tr "T", "U"
	end


	def reverse_complement( dna_string )
	dna_string.reverse.tr("A","Z").tr("C","Q").tr("T","A").tr("G","C").tr("Q","G").tr("Z","T")
	end


	def parse_fasta_file(filename)
	fasta_pairs = {}
	fasta_id = nil
	File.open(filename).each_line do \|line\|
	if line[0] == '>'
	fasta_id = line.chomp
	fasta_id[0] = ""
	fasta_pairs[fasta_id] = ""
	else
	fasta_pairs[fasta_id] += line.chomp
	end
	end
	return fasta_pairs
	end


	def parse_adjacency_list_file filename
	graph_data = {:nodes_count => nil, :adjacency_list => []}
	File.open(filename).each_line do \|line\|
	splitted_line = line.split
	if splitted_line.count == 2
	graph_data[:adjacency_list] << [splitted_line[0].to_i, splitted_line[1].to_i]
	else
	graph_data[:nodes_count] = splitted_line.first.to_i
	end
	end
	return graph_data
	end


	def parse_set_file filename
	lines = File.open(filename).lines.to_a

	universum_set = (1..lines[0].to_i).to_a

	lines[1].delete! "{}"
	lines[2].delete! "{}"
	set1 = lines[1].split(",").map{\|e\| e.to_i}
	set2 = lines[2].split(",").map{\|e\| e.to_i}

	return [universum_set.to_set, set1.to_set, set2.to_set]
	end


	def calc_gc(dna_string)
	100.0 * ( dna_string.count('G') + dna_string.count('C') ) / dna_string.length
	end


	# Task: Computing GC Content
	def find_max_gc( fasta_pairs )
	max_fasta_pair = fasta_pairs.max_by{ \|fasta_id, dna_string\| calc_gc(dna_string) }
	return [max_fasta_pair[0], calc_gc(max_fasta_pair[1])]
	end


	def get_substring_data dna_string, substring_length, substring_ind
	start_ind = substring_ind
	end_ind = start_ind + substring_length
	{:string => dna_string[start_ind...end_ind], :start_ind => start_ind + 1}
	end


	# Task: Finding a Motif in DNA
	def get_substring_start_indexes( dna_string, substring )
	indexes = []
	legal_substring_count = dna_string.length - substring.length
	for i in 0...legal_substring_count
	substring_data = get_substring_data(dna_string, substring.length, i)
	substring_data[:string] == substring ? indexes << substring_data[:start_ind] : nil
	end
	return indexes
	end


	# Task: A Brief Introduction to Graph Theory
	def make_directed_graph fasta_pairs, k
	graph = []
	fasta_pairs.each do \|top_fasta_id, top_dna_string\|
	fasta_pairs.each do \|fasta_id, dna_string\|
	if top_dna_string[-k..-1] == dna_string[0...k] && top_fasta_id != fasta_id
	graph << [top_fasta_id, fasta_id]
	end
	end
	end

	return graph
	end


	# ("AGACCTGCCG", "CCTGCCGGAA") => CCTGCCG
	def find_start_substring( src_str, dst_str )
	start_substring = ""
	indexes = dst_str.indexes(src_str[-1]).reverse
	indexes.each do \|ind\|
	if src_str[-1-ind..-1] == dst_str[0..ind]
	start_substring = dst_str[0..ind]
	break
	end
	end

	return start_substring
	end


	def make_overlap_graph(fasta_pairs)
	overlap_graph = []
	strings_arr = fasta_pairs.values

	for i in 0...strings_arr.length
	for j in i+1...strings_arr.length
	if strings_arr[i][strings_arr[j]] \|\| strings_arr[j][strings_arr[i]]
	next
	else
	start_substring = find_start_substring( strings_arr[i], strings_arr[j] )
	end_substring = find_start_substring( strings_arr[j], strings_arr[i] )
	overlap_graph << {:src => strings_arr[i], :dst => strings_arr[j], :to => start_substring, :from => end_substring}
	end
	end
	end

	return overlap_graph
	end


	def get_edges_from_overlap_graph overlap_graph
	nodes = Set.new
	edges = Set.new
	overlap_graph.each do \|edge\|
	node_src = nodes.find{\|e\| e.name == edge[:src]} \|\| Node.new(edge[:src])
	node_dst = nodes.find{\|e\| e.name == edge[:dst]} \|\| Node.new(edge[:dst])
	nodes << node_dst
	nodes << node_src
	if edge[:to].length > 0
	new_edge = Edge.new(node_src, node_dst, -edge[:to].length)
	edges << new_edge if !edges.find{\|e\| e == new_edge}
	end

	if edge[:from].length > 0
	new_edge = Edge.new(node_dst, node_src, -edge[:from].length)
	edges << new_edge if !edges.find{\|e\| e == new_edge}
	end
	end

	return edges
	end


	def get_superstring_from_route route
	superstring = ""
	route.edges.each_with_index do \|edge, i\|
	src_str = edge.from.name
	dst_str = edge.to.name
	substr_weight = edge.weight
	if i == 0
	superstring = src_str.clone
	superstring[substr_weight..-1] = dst_str
	else
	superstring[substr_weight..-1] = dst_str
	end
	end
	return superstring
	end


	def check_superstring superstring, test_strings
	test_strings.each do\|str\|
	return "Uncorrect superstring!" if !superstring[str]
	end
	return "Good superstring!"
	end


	# Task: Genome Assembly as Shortest Superstring
	def get_best_route(overlap_graph, need_print = false)
	good_edges = overlap_graph.get_more_than_half_weight_edges
	good_graph = Graph.new good_edges, :bidirectional => false, :sort_edges => true
	superstring = get_superstring_from_route(good_graph)
	p superstring.length
	p superstring if need_print
	end


	# Task: Counting Point Mutations
	def hamming_distance str1, str2
	distance = 0
	for i in 0...str1.length
	if str1[i] != str2[i]
	distance += 1
	end
	end
	return distance
	end


	def get_corrects dna_strings
	appears = {}
	for i in 0...dna_strings.length
	for j in i+1...dna_strings.length
	appears[dna_strings[i]] \|\|= 0
	if dna_strings[i] == dna_strings[j] \|\| reverse_complement(dna_strings[i]) == dna_strings[j]
	appears[dna_strings[i]] += 1
	end
	end
	end
	appears.find_all{\|str, appears_count\| appears_count > 0}.collect{\|arr\| arr[0]}
	end


	def get_corrections(incorrects, corrects)
	corrections = []
	incorrects.each do \|incorrect\|
	corrects.each do \|correct\|
	ham_dist = hamming_distance(incorrect, correct)
	if ham_dist == 1
	corrections << [incorrect, correct]
	break
	else
	correct_complement = reverse_complement(correct)
	compl_ham_dist = hamming_distance(incorrect, correct_complement)
	if compl_ham_dist == 1
	corrections << [incorrect, correct_complement]
	break
	end
	end
	end
	end
	return corrections
	end


	# Task: Error Correction in Reads
	def get_corrections_from_strings(dna_strings)
	corrects = get_corrects(dna_strings)
	incorrects = dna_strings - corrects
	corrections = get_corrections(incorrects, corrects)

	corrections.each{ \|(incorrect, correction)\| puts "#{incorrect}->#{correction}" }
	end


	def codon_table
	codon_table_arr = "UUU F CUU L AUU I GUU V
	UUC F CUC L AUC I GUC V
	UUA L CUA L AUA I GUA V
	UUG L CUG L AUG M GUG V
	UCU S CCU P ACU T GCU A
	UCC S CCC P ACC T GCC A
	UCA S CCA P ACA T GCA A
	UCG S CCG P ACG T GCG A
	UAU Y CAU H AAU N GAU D
	UAC Y CAC H AAC N GAC D
	UAA Stop CAA Q AAA K GAA E
	UAG Stop CAG Q AAG K GAG E
	UGU C CGU R AGU S GGU G
	UGC C CGC R AGC S GGC G
	UGA Stop CGA R AGA R GGA G
	UGG W CGG R AGG R GGG G".split.each_slice(2).to_a
	codon_table_hash = Hash[codon_table_arr]
	codon_table_hash["UAA"] = ""
	codon_table_hash["UAG"] = ""
	codon_table_hash["UGA"] = ""
	return codon_table_hash
	end


	# Task: Translating RNA into Protein
	def rna_into_protein rna_string
	rna_string.chars.each_slice(3).map(&:join).map{\|str3\| codon_table[str3]}.join
	end


	def permutations(array, permutations_array, ind, permutation_ind)
	if permutation_ind == array.length-1
	return permutations_array
	else
	for i in ind...array.length
	new_arr = array.clone
	new_arr.swap(ind, i)
	permutations_array << new_arr
	permutations(new_arr, permutations_array, ind+1, permutation_ind+1)
	end
	end
	return permutations_array.uniq
	end


	# Task: RNA Splicing
	def rna_without_introns dna_string, introns
	introns.each{ \|intron\| dna_string[intron] = "" }
	rna_into_protein( dna_to_rna( dna_string ) )
	end


	# Task: Enumerating Gene Orders
	def permutations_of_length length
	array = (1..length).to_a
	total_length = array.inject(1){\|s,e\| s*e}
	permutations = permutations(array, [], 0, 0)
	file = File.new 'perm', 'w'
	file.puts "#{total_length}"
	permutations.each{\|perm\| file.puts perm.join(" ")}
	end


	# Task: Enumerating k-mers Lexicographically
	def k_mers(array, k)
	array.repeated_permutation(k).each{\|sub_arr\| puts sub_arr.join}
	end


	# Task: k-Mer Composition
	def composition_4mer( dna_string )
	cnt = 4
	permutations = Hash[['A', 'C', 'G', 'T'].repeated_permutation(cnt).map{\|e\| [e.join, 0]}]
	for i in 0..(dna_string.length - cnt)
	permutations[dna_string[i...i+cnt]] += 1 if permutations[dna_string[i...i+cnt]]
	end
	return permutations.values
	end


	# Task: Inferring mRNA from Protein
	def protein_to_rna_possible_count( protein_string, mod = 1000000 )
	rna_freq_table = {}
	codon_table.values.each do \|elem\|
	rna_freq_table[elem] \|\|= 0
	rna_freq_table[elem] += 1
	end

	possible_count = 1
	protein_string.each_char do \|char\|
	possible_count = rna_freq_table[char] * possible_count
	end
	return ( ( possible_count * rna_freq_table[""] ) % mod )
	end


	# Task: Mendel's First Law
	# k - dominant homo; m - hetero; n - recessive homo
	def dominant_probability( k, m, n)
	population = k + m + n

	(k / population) * ((k-1) / (population - 1) + m / (population - 1) + n / (population - 1)) +
	(m / population) * (k / (population - 1) + 0.75(m-1) / (population - 1) + 0.5n / (population - 1)) +
	(n / population) * (k / (population - 1) + 0.5*m / (population - 1))
	end


	# Task: Completing a Tree
	def min_edges_to_be_tree adjacency_list, nodes_count
	nodes_groups = Array.new( nodes_count )
	(1..nodes_count).to_a.each{ \|i\| nodes_groups[i-1] = [i].to_set }

	adjacency_list.each do \|(node_from, node_to)\|
	has_group = false
	for i in 0...nodes_groups.length
	node_group = nodes_groups[i]
	if node_group.include?(node_from) \|\| node_group.include?(node_to)
	has_group = true
	node_group << node_from
	node_group << node_to
	end
	end
	nodes_groups << [node_from, node_to].to_set if !has_group
	end

	(1..nodes_count).to_a.each do \|node\|
	to_merge_groups = []
	nodes_groups.each_with_index do \|node_group, i\|
	to_merge_groups << i if node_group.include?(node)
	end
	merged_group = [].to_set
	to_merge_groups.each{\|ind\| merged_group += nodes_groups[ind]; nodes_groups[ind] = nil}
	nodes_groups.compact!
	nodes_groups << merged_group
	end

	return (nodes_groups.count - 1)
	end


	# Task: Counting Subsets
	def subsets_count n
	2**n % 1000000
	end


	# Task: Introduction to Set Operations
	def set_operations set1, set2, universum_set
	{:union => set1 + set2, :intersection => set1 & set2, :difference1 => set1 - set2, :difference2 => set2 - set1,
	:complement1 => universum_set - set1, :complement2 => universum_set - set2}
	end


	# Task: Constructing a De Bruijn Graph
	def make_de_bruijn dna_strings
	for i in 0...dna_strings.length
	dna_strings << reverse_complement(dna_strings[i])
	end
	dna_strings.sort!
	dna_strings = dna_strings.to_set

	k = dna_strings.first.length - 1
	edges = Set.new
	nodes = {}
	dna_strings.each do \|dna_str\|
	nodes[dna_str[0...k]] \|\|= Node.new dna_str[0...k]
	nodes[dna_str[1...k+1]] \|\|= Node.new dna_str[1...k+1]
	edges << Edge.new( nodes[dna_str[0...k]], nodes[dna_str[1...k+1]], 0)
	end
	return edges
	end
	end

	include Rosalind20

	dna_strings = %w(
	TGAT
	CATG
	TCAT
	ATGC
	CATC
	CATC
	)
	edges = make_de_bruijn( dna_strings )
	edges.each {\|edge\| puts edge.to_s(:with_brackets => true)}