Skip to content

Instantly share code, notes, and snippets.

@mumriks mumriks/xlsxgrep.rb
Last active Aug 29, 2015

Embed
What would you like to do?
エクセルグレップ(2007 以降用) 要:RubyZip
# encoding: utf-8
#
# エクセル2007 以降のファイル内を検索する
# 2014/07/28, 2014/07/30
require 'rexml/document'
require 'rubygems'
require 'zip/filesystem'
class Sheet
attr_accessor :id, :name, :path
end
def usage
print "usage: ruby xlsxgrep.rb <検索文字列> files...\n"
exit(1)
end
def find_it(name, cell, value)
puts "SheetName: #{name} Cell-Position: #{cell}"
puts "\t#{value}"
return false
end
usage if ARGV.size < 2
pat = Regexp.new(ARGV.shift.encode("UTF-8").upcase)
ARGV.each {|xlsx|
if File.directory?(xlsx)
next
elsif "" == File.extname(xlsx)
xlsx = "#{xlsx}.xlsx"
elsif ".xlsx" != File.extname(xlsx)
next
end
if !File.exist?(xlsx)
puts "'#{xlsx}' is not found."
next
end
xlsx = File.expand_path(xlsx).gsub(/\//s, "\\")
puts "file:#{xlsx}"
notfound = true
Zip::File.open(xlsx) {|zip|
sheets = []
sheets_path_list = []
main_file = ''
shared_string_file = ''
type_xml = REXML::Document.new(zip.file.read("[Content_Types].xml"))
type_xml.elements['Types'].each_element {|override|
if "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" == override.attributes['ContentType']
main_file = override.attributes['PartName']
elsif "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml" == override.attributes['ContentType']
shared_string_file = override.attributes['PartName']
elsif "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" == override.attributes['ContentType']
sheets_path_list << override.attributes['PartName']
end
}
book_xml = REXML::Document.new(zip.file.read(main_file))
book_xml.elements['workbook/sheets/'].each_element {|s|
sheet = Sheet.new
sheet.id = s.attributes['sheetId'].to_i
sheet.name = s.attributes['name']
sheets << sheet
}
sheets.each {|s|
sheets_path_list.each {|sl|
if s.id == sl.slice(/\d+/).to_i
s.path = sl
break
end
}
}
sheets = sheets.sort_by{|s| s.id}
strings_xml = REXML::Document.new(zip.file.read(shared_string_file))
strs = []
strings_xml.elements['sst'].each_element {|si|
si.each_element {|t| strs << t.text}
}
sheets.each {|sheet|
sheet_xml = REXML::Document.new(zip.file.read(sheet.path))
sheet_xml.elements['worksheet/sheetData/'].each_element {|row|
row.each_element {|c|
cell_position = c.attributes['r']
value = ''
if "n" == c.attributes['t']
value = c.elements['v'].text
elsif "s" == c.attributes['t']
value = strs[c.elements['v'].text.to_i]
end
if pat =~ value
notfound = find_it(sheet.name, cell_position, value)
end
if c.elements['f']
if pat =~ c.elements['f'].text
notfound = find_it(sheet.name, cell_position, c.elements['f'].text)
end
end
}
}
}
}
puts 'Not Found.' if notfound
puts
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.