Created
December 28, 2011 05:35
-
-
Save sattybhens/1526562 to your computer and use it in GitHub Desktop.
discover unique excel formula's using JRuby and POI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Given an Excel file... iterate over each cell that has a formula | |
# and collect all the unique formula signatures | |
# formula signature are in RPN and shown as | |
# => Ref#UnaryMinus#Ref#Multiply#Int#Int#Power#Divide#Ref#Multiply | |
# => Ref#Ref#Multiply#Ref#Ref#Multiply#Subtract#Parenthesis#Ref#Multiply#Int#Int#Power#Divide#Ref#Subtract | |
# => Ref#Ref#Multiply#Ref#Ref#Multiply#Subtract#Parenthesis#Ref#Multiply#Int#Int#Power#Divide#Ref#Multiply#Ref#Subtract | |
# => Ref#Ref#Multiply#Int#Int#Power#Divide | |
# => Ref#Area#Attr#Ref#Multiply#Ref#Multiply#Parenthesis#Divide | |
# => Ref#Area#Attr#Ref#Multiply#Int#Int#Power#Divide#Ref#Multiply#Parenthesis#Divide | |
# => Ref#Area#Attr#Ref#Multiply#Int#Int#Power#Divide#Parenthesis#Divide | |
require 'rubygems' | |
require 'poi' | |
CELL_TYPE_FORMULA = 2 | |
RE = /formula\.([\D]+)[\S]*Ptg/ | |
# test cases | |
# org.apache.poi.hssf.record.formula.RefPtg [D466] => Ref | |
# org.apache.poi.hssf.record.formula.FuncVarPtg [CONCATENATE nArgs=13] => FuncVar | |
# org.apache.poi.hssf.record.formula.StringPtg ["'"] => String | |
# org.apache.poi.hssf.record.formula.Ref3DPtg [sheetIx=1 ! E65] => Ref | |
# org.apache.poi.hssf.record.formula.IntPtg [100] => Int | |
# class org.apache.poi.hssf.record.formula.AddPtg => Add | |
workbook = POI::Workbook.open('B.xlsx') | |
evaluation_workbook = org.apache.poi.xssf.usermodel.XSSFEvaluationWorkbook.create workbook.poi_workbook | |
sheet_count = workbook.number_of_sheets | |
(0...sheet_count).each do |sheet_index| | |
sheet = workbook.sheet_at(sheet_index) | |
# puts sheet.methods.sort | |
puts "-" * 50 | |
puts "#{sheet.sheet_name}" | |
# puts "last_row_num: #{sheet.get_last_row_num}" | |
formulas = {} | |
row_count = sheet.get_last_row_num | |
(0...row_count).each do |row_num| | |
row = sheet.row(row_num) | |
next if row.nil? | |
cell_start = row.get_first_cell_num | |
cell_end = row.get_last_cell_num | |
# puts "#{row_num} : #{cell_start} -> #{cell_end}" | |
# puts row.methods.sort.inspect if row_num == 0 | |
(cell_start...cell_end).each do |column| | |
cell = row.cell(column) | |
unless cell.nil? | |
cell_type = cell.get_cell_type | |
if (cell_type == CELL_TYPE_FORMULA) | |
begin | |
parts = org.apache.poi.ss.formula.FormulaParser.parse(cell.to_s, evaluation_workbook, 0 , sheet_index) | |
f = parts.collect do |ptg| | |
ptg.to_s.match(RE)[1] | |
end | |
formula_key = f.join('#') | |
formulas[formula_key] = cell.to_s unless formulas.has_key?(formula_key) | |
rescue NativeException => e | |
puts "Don't understand => #{cell}" | |
end | |
end | |
end | |
end | |
end | |
puts "************** : #{formulas.size} unique formulas on this sheet" | |
formulas.each { |key, value| puts "#{key}" } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment