Skip to content

Instantly share code, notes, and snippets.

@yagays
Created September 5, 2010 07:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yagays/565830 to your computer and use it in GitHub Desktop.
Save yagays/565830 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require "pp"
def input_marshal_data(path)
a = 0
a_id_list = []
open(path,"rb") {|f|
a = Marshal.load(f)
a.each_key {|key| a_id_list.concat([key]) }
}
return a, a_id_list
end
def start_chr_end(list,id)
h = []
if list.key?(id)
if list[id].size != 1
list_start = list[id].min{|a,b| a[1] <=> b[1] }[1]
list_end = list[id].max{|a,b| a[2] <=> b[2] }[2]
h = [list[id][0][0], list_start, list_end]
else
h = [list[id][0][0], list[id][0][1], list[id][0][2]]
end
end
h
end
def diff(reference, origin, gene, coverage, threshold)
start_stream = nil
end_stream = nil
if reference != [] && origin != [] && reference[0] == origin[0]
if (reference[2].to_i - origin[2].to_i).abs < coverage #end固定
diff = reference[1].to_i - origin[1].to_i
if diff.abs > threshold
start_stream = [gene, diff, cover(reference,origin), reference[1], origin[1]].join("\t")
end
end
if (reference[1].to_i - origin[1].to_i).abs < coverage #start固定
diff = origin[2].to_i - reference[2].to_i
if diff.abs > threshold
end_stream = [gene, diff,cover(reference,origin), reference[2], origin[2]].join("\t")
end
end
end
return start_stream, end_stream
end
def cover(reference, origin)
if reference[2].to_i < origin[2].to_i
right_diff = reference[2].to_i
else
right_diff = origin[2].to_i
end
if reference[1].to_i < origin[1].to_i
left_diff = origin[1].to_i
else
left_diff = reference[1].to_i
end
ratio = (right_diff - left_diff)*100.0/(reference[2].to_i - reference[1].to_i)
return ratio
end
if __FILE__ == $PROGRAM_NAME
# 一方の端を固定するときのcoverage.+n ~ -nまで.
coverage = 100
# もう一方の端のうち,どの程度以上をmis-annotatedとするか.+n.
threshold = 100
# data_dirの指定
b_path = ARGV[0] || "/Users/yag/ngs/marshal_dump/b_fpkm_sorted_tmap_translated_marshal.dump"
l_path = ARGV[1] || "/Users/yag/ngs/marshal_dump/l_fpkm_sorted_tmap_translated_marshal.dump"
m_path = ARGV[2] || "/Users/yag/ngs/marshal_dump/m_fpkm_sorted_tmap_translated_marshal.dump"
r_path = ARGV[3] || "/Users/yag/ngs/marshal_dump/refseq_biomart/refseq_biomart_marshal.dump"
# 読み込み
b, b_id_list = input_marshal_data(b_path)
l, l_id_list = input_marshal_data(l_path)
m, m_id_list = input_marshal_data(m_path)
ref, ref_id_list = input_marshal_data(r_path)
id_list_all = b_id_list + l_id_list + m_id_list
id_list = id_list_all.uniq
name_blm = ["b","l","m"]
[b,l,m].each_with_index do |tissue,n| # each tissue
output_start = []
output_end = []
id_list.each do |gene| # each gene
reference = ref[gene]
origin = start_chr_end(tissue,gene)
start_tmp, end_tmp = diff(reference, origin, gene, coverage, threshold)
output_start << start_tmp if start_tmp != nil
output_end << end_tmp if end_tmp != nil
end
open("#{name_blm[n]}_start.txt","w"){|f|
f.puts output_start
}
open("#{name_blm[n]}_end.txt","w"){|f|
f.puts output_end
}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment