Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mickey24/964593 to your computer and use it in GitHub Desktop.
Save mickey24/964593 to your computer and use it in GitHub Desktop.
kimagure de fo-ku sitemita
# -*- coding: utf-8 -*-
require "open-uri"
require "json"
require "pp"
###
# loading bullet
###
organism = "Homo+sapiens"
study_type = "Transcriptome+Analysis"
###
# homuhomu
###
def count_samples(namamono, sakusen) # betsuni method teigi suru hitsuyou nakatta kedo maa iiya mitame no mondai desyo konnnano
# get full study list
mishima = "http://trace.ddbj.nig.ac.jp/DRASearch/"
ground_floor = "#{mishima}query?organism=#{namamono}&study_type=#{sakusen}&show=100"
deeper_underground = "#{mishima}query?organism=#{namamono}&study_type=#{sakusen}&show=100&page=2"
list_study = URI("#{ground_floor}").read.scan(/href="(study\?acc=.*)" target/).flatten
list_study2 = URI("#{deeper_underground}").read.scan(/href="(study\?acc=.*)" target/).flatten
full_list = list_study + list_study2
# save full sample list as json format file
sample_id_full_list = full_list.map { |study_url|
URI("#{mishima}#{study_url}").read.scan(/href="(sample\?acc=.*)" target/)
}.flatten
n = namamono.gsub("+", "_")
s = sakusen.gsub("+", "_")
open("./#{n}_#{s}_sample_id.json", "w") {|f| JSON.dump(sample_id_full_list, f)}
# make hash.. key: sample id, value: sample description
srs_vs_desc = {}
full_list.each do |study_url|
srs_id = URI("#{mishima}#{study_url}").read.scan(/href="(sample\?acc=.*)" target/).flatten
srs_id.each do |sample_url|
URI("#{mishima}#{sample_url}").read =~ %r|Description</td><td>(.+)</td>|
srs_vs_desc[sample_url] = $1 || "no description"
end
end
# save hash (but no need to save him)
open("./#{n}_#{s}_srs_vs_desc.json", "w") {|f| JSON.dump(srs_vs_desc, f)}
# reverse hash.. key: sample description, value: sample id (array)
desc_vs_srs = {}
srs_vs_desc.each_pair do |sample_url, desc|
desc_vs_srs[desc] ||= []
desc_vs_srs[desc].push(sample_url)
end
# count the number of sample ids which have same sample description
desc_vs_numofsrs = {}
desc_vs_srs.each_pair do |desc, sample_url|
numofsrs = sample_url.length
desc_vs_numofsrs[desc] = numofsrs
end
# sort hash by the number of sample ids
sorted_list = desc_vs_numofsrs.to_a.sort {|a, b|
(b[1] <=> a[1]) * 2 + (a[0] <=> b[0])
}
# fumu~n
sorted_list
end
if __FILE__ == $0
# kokode nanika tokubetsu na kotowo suru yotei datta to iu kotonisiteokou
pp count_samples(organism,study_type)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment