Skip to content

Instantly share code, notes, and snippets.

@chaserx
Created September 2, 2009 04:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chaserx/179550 to your computer and use it in GitHub Desktop.
Save chaserx/179550 to your computer and use it in GitHub Desktop.
desc "yank out the UniGene number from the records downloaded in the ncbi task"
task :uniyank do
#require 'open-uri'
#I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true
#can't find _why's hpricot. trying nokogiri
#require 'nokogiri'
#unigene regex
#regex = /Hs.[0-9]{6}/
#regex = /Hs.[0-9A-Z]{6}/
regex = /Hs\./
# open out file
outfile = File.new("unigene.txt", "w+")
Dir.glob("./ncbigp/*.xml") { |x|
myfile = File.new(x, "r")
myfile.each do |line|
#puts line.gsub!(/<Object-id_str>|<\/Object-id_str>/, "") if line =~ regex
outfile << line.gsub!(/<Object-id_str>|<\/Object-id_str>/, "").lstrip if line =~ regex
end
}
outfile.close
#Dir.foreach("./ncbigp") { |f|
#
# #unigene regex
# regex = /Hs.[0-9]{6}/
#
# if FileTest::exists?(f)
# if f =~ /ncbi_outfile_[A-Z]\w{5}.xml/
# puts "opening #{f}"
# infile = File.new(f, "r")
# infile.each do |line|
# #Debug
# puts line if line =~ regex
# end
# else
# puts "ignoring this #{f}"
# end
# else
# raise "File doesn't exist #{f}"
# end
#
#}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment