Skip to content

Instantly share code, notes, and snippets.

@CharlesOkwuagwu
Last active August 12, 2017 05:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CharlesOkwuagwu/de07f9d3d7dbaad4da198b52d792ea48 to your computer and use it in GitHub Desktop.
Save CharlesOkwuagwu/de07f9d3d7dbaad4da198b52d792ea48 to your computer and use it in GitHub Desktop.
Compilation error: Elixir 1.5.1, OTP 20, on Windows 10
C:\Elixir\rmas
λ mix compile
==> exoffice
Compiling 2 files (.ex)
== Compilation error in file lib/exoffice/parser/excel_2003/loader.ex ==
** (CaseClauseError) no case clause matching: []
(elixir) src/elixir_bitstring.erl:113: :elixir_bitstring.expand_each_spec/4
(elixir) src/elixir_bitstring.erl:86: :elixir_bitstring.expand_specs/5
(elixir) src/elixir_bitstring.erl:29: :elixir_bitstring.expand/6
(elixir) src/elixir_bitstring.erl:9: :elixir_bitstring.expand/4
could not compile dependency :exoffice, "mix compile" failed. You can recompile this dependency with "mix deps.compile exoffice", update it with "mix deps.update exoffice" or clean it with "mix deps.clean exoffice"
defmodule Exoffice.Parser.Excel2003.Loader do
alias Exoffice.Parser.Excel2003.OLE
alias Exoffice.Parser.Excel2003.Cell
alias Exoffice.Parser.Excel2003.String, as: ExofficeString
alias Exoffice.Parser.Excel2003
use Bitwise, only_operators: true
# ParseXL definitions
@xls_biff8 0x0600
@xls_biff7 0x0500
@xls_workbook_globals 0x0005
@xls_worksheet 0x0010
# Calendar
@calendar_windows_1900 1900 # Base date of 1st Jan 1900 = 1.0
@calendar_mac_1904 1904 # Base date of 2nd Jan 1904 = 1.0
# record identifiers
@xls_type_sheet 0x0085
@xls_type_bof 0x0809
@xls_type_codepage 0x0042
@xls_type_datemode 0x0022
@xls_type_sst 0x00fc
@xls_type_continue 0x003c
@xls_type_labelsst 0x00fd
@xls_type_number 0x0203
@xls_type_blank 0x0201
@xls_type_eof 0x000a
@xls_type_rk 0x027e
@xls_type_mulrk 0x00bd
@xls_type_mulblank 0x00be
# sheet state
@sheetstate_visible "visible"
@sheetstate_hidden "hidden"
@sheetstate_veryhidden "veryHidden"
defstruct data: nil,
summary_information: nil,
document_summary_information: nil
def load(path, sheet \\ nil) do
with {:ok, file} <- File.open(path, [:read, :binary]),
{:ok, ole_header} <- :file.read(file, 8),
true <- ole_header == OLE.identifier_ole,
{:ok, binary} <- File.read(path),
{:ok, ole} <- OLE.parse_blocks(binary),
loader <- get_stream(ole),
{stream, _, excel} <- parse(loader.data, 0, create_excel_2003(loader)),
pids = parse_sheets(stream, excel, sheet) do
Enum.map(pids, fn {status, pid, _} -> {status, pid} end)
else
{:error, reason} -> {:error, reason}
end
end
defp create_excel_2003(loader) do
%Excel2003{
data_size: byte_size(loader.data),
shared_strings_tid: GenServer.call(Xlsxir.StateManager, :new_table)
}
end
defp get_stream(ole) do
[data, summary_information, document_summary_information] = Enum.map([
ole.workbook,
ole.summary_information,
ole.document_summary_information
], fn
nil -> nil
prop_name ->
prop = Enum.at(ole.props, prop_name)
OLE.get_stream(ole, prop, prop.start_block, "")
end)
%__MODULE__{
data: data,
summary_information: summary_information,
document_summary_information: document_summary_information
}
end
defp parse_sheets(stream, %Excel2003{shared_strings_tid: shared_strings_tid} = excel, sheet) do
sheets = if is_nil(sheet), do: excel.sheets, else: [Enum.at(excel.sheets, sheet)]
tids = sheets
|> Enum.filter(fn %{sheet_type: type} ->
type == <<0>>
end)
|> Enum.map(fn sheet ->
sheet_tid = GenServer.call(Xlsxir.StateManager, :new_table)
parse_sheet_part(stream, sheet.offset, excel, sheet.offset_end, sheet_tid)
{:ok, sheet_tid, excel}
end)
:ets.delete(shared_strings_tid)
tids
end
def parse_sheet_part(<<_::binary-size(pos), code::litle-2*8, _::binary>> = stream, pos, excel, offset_end, sheet_tid) when (byte_size(stream) - 4 > pos) do
IO.puts "code: #{inspect code, base: :hex} -> #{get_code(code)}"
case code do
@xls_type_bof -> read_bof(stream, pos, excel, offset_end, :parse_sheet_part, sheet_tid)
@xls_type_labelsst -> read_label_sst(stream, pos, excel, offset_end, sheet_tid)
@xls_type_number -> read_number(stream, pos, excel, offset_end, sheet_tid)
@xls_type_rk -> read_rk(stream, pos, excel, offset_end, sheet_tid)
@xls_type_mulrk -> read_mul_rk(stream, pos, excel, offset_end, sheet_tid)
@xls_type_blank -> read_blank(stream, pos, excel, offset_end, sheet_tid)
@xls_type_mulblank ->read_mul_blank(stream, pos, excel, offset_end, sheet_tid)
@xls_type_eof -> read_eof(stream, pos, excel)
_ -> read_default(stream, pos, excel, offset_end, sheet_tid)
end
end
def parse_sheet_part(stream, pos, excel, _, _), do: {stream, pos, excel}
def parse(<<_::binary-size(pos), code::litle-2*8, _::binary>> = stream, pos, %Excel2003{data_size: data_size} = excel) when (data_size - 4 > pos) do
case code do
@xls_type_bof -> read_bof(stream, pos, excel)
@xls_type_sheet -> read_sheet(stream, pos, excel)
@xls_type_codepage -> read_codepage(stream, pos, excel)
@xls_type_datemode -> read_datemode(stream, pos, excel)
@xls_type_sst -> read_sst(stream, pos, excel)
_ -> read_default(stream, pos, excel)
end
end
def parse(stream, pos, excel) do
{stream, pos, excel}
end
defp read_default(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8>> = binary_part(stream, pos + 2, 2)
parse_sheet_part(stream, pos + 4 + length, excel, offset_end, sheet_tid)
end
defp read_default(stream, pos, excel) do
<<length::little-2*8>> = binary_part(stream, pos + 2, 2)
parse(stream, pos + 4 + length, excel)
end
defp read_bof(stream, pos, excel, offset_end \\ nil, fun \\ :parse, tid \\ nil) do
length = OLE.get_int_2d(stream, pos + 2)
record_data = binary_part(stream, pos + 4, length)
new_pos = pos + length + 4
# offset: 2; size: 2; type of the following data
substream_type = OLE.get_int_2d(record_data, 2)
case substream_type do
@xls_workbook_globals ->
version = OLE.get_int_2d(record_data, 0)
if (version != @xls_biff8) && (version != @xls_biff7) do
{:error, "Cannot read this Excel file. Version is too old."}
else
parse(stream, new_pos, %{excel | version: version})
end
@xls_worksheet ->
# do not use this version information for anything
# it is unreliable (OpenOffice doc, 5.8), use only version information from the global stream
apply(__MODULE__, fun, (if fun == :parse, do: [stream, new_pos, excel], else: [stream, new_pos, excel, offset_end, tid]))
_ ->
# substream, e.g. chart
# just skip the entire substream
read_bof_default(stream, new_pos, excel)
end
end
defp read_label_sst(stream, pos, %Excel2003{shared_strings_tid: tid} = excel, offset_end, sheet_tid) do
length = OLE.get_int_2d(stream, pos + 2)
record_data = binary_part(stream, pos + 4, length)
# offset: 0; size: 2; index to row
row = OLE.get_int_2d(record_data, 0) + 1
# offset: 2; size: 2; index to column
column = OLE.get_int_2d(record_data, 2)
column_string = Cell.string_from_column_index(column)
# offset: 6; size: 4; index to SST record
index = OLE.get_int_4d(record_data, 6)
value = get_shared_string(tid, index)
IO.puts "string: #{inspect {"#{column_string}#{row}", value, length, record_data}}"
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), value]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), value]]})
end
parse_sheet_part(stream, pos + 4 + length, excel, offset_end, sheet_tid)
end
defp read_number(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8, row::litle-2*8, column::litle-2*8, _xf::binary-size(2), value::litle-8*8>> = binary_part(stream, pos + 2, 2 + 6 + 8)
row = row + 1
column_string = Cell.string_from_column_index(column)
IO.puts "number: #{inspect {"#{column_string}#{row}", value}}"
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), value]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), value]]})
end
parse_sheet_part(stream, pos + 4 + length, excel, offset_end, sheet_tid)
end
defp read_mul_rk(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8, row::litle-2*8, column::litle-2*8>> = binary_part(stream, pos + 2, 2 + 4)
row = row + 1
cell_count = div(length - 6, 6)
for col <- 1..cell_count do
start = 6 * (col - 1)
value = extract_rk_number(binary_part(stream, pos + 4 + 4 + start, 6))
column = column + (col - 1)
column_string = Cell.string_from_column_index(column)
IO.puts "mul-rk-#{col}: #{inspect {"#{column_string}#{row}", value}}"
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), value]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), value]]})
end
end
parse_sheet_part(stream, pos + 4 + length, excel, offset_end, sheet_tid)
end
defp read_rk(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8, row::litle-2*8, column::litle-2*8, record_data::binary-size(6)>> = binary_part(stream, pos + 2, 2 + 4 + 6)
row = row + 1
column_string = Cell.string_from_column_index(column)
value = extract_rk_number(record_data)
IO.puts "rk-num: #{inspect {"#{column_string}#{row}", value, length, record_data}}"
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), value]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), value]]})
end
parse_sheet_part(stream, pos + 4 + length, excel, offset_end, sheet_tid)
end
defp read_mul_blank(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8, row::litle-2*8, column::litle-2*8>> = binary_part(stream, pos + 2, 2 + 4)
row = row + 1
cell_count = div(length - 6, 2)
for col <- 1..cell_count do
column = column + (col - 1)
column_string = Cell.string_from_column_index(column)
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), nil]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), nil]]})
end
end
end
defp read_blank(stream, pos, excel, offset_end, sheet_tid) do
<<length::little-2*8, row::litle-2*8, column::litle-2*8, _xf::binary>> = binary_part(stream, pos + 2, 2 + 6)
row = row + 1
column_string = Cell.string_from_column_index(column)
IO.puts "blank: #{inspect {"#{column_string}#{row}", nil, length}}"
# add cell
case :ets.match(sheet_tid, {row, :"$1"}) do
[[cells]] ->
:ets.insert(sheet_tid, {row, cells ++ [[column_string <> to_string(row), nil]]})
_ ->
:ets.insert(sheet_tid, {row, [[column_string <> to_string(row), nil]]})
end
parse_sheet_part(stream, pos + 4 + 6, excel, offset_end, sheet_tid)
end
defp extract_rk_number(<<_ixfe::little-2*8, 0::1*1, 0::1*1, num30::bitstring>> = data) do
<<i::little-float-8*8>> = <<0::34, num30::bitstring>>
i
end
defp extract_rk_number(<<_ixfe::little-2*8, 0::1*1, 1::1*1, num30::bitstring>>) do
<<i::signed-integer-8*8>> = <<0::1*1, 0::1*1, num30::bitstring, 0::32>>
i
end
defp extract_rk_number(<<_ixfe::little-2*8, 1::1*1, 0::1*1, num30::bitstring>>) do
<<i::little-float-8*8>> = <<0::32, 0::1*1, 0::1*1, num30::bitstring>>
i / 100
end
defp extract_rk_number(<<_ixfe::little-2*8, 1::1*1, 1::1*1, num30::bitstring>>) do
<<i::signed-integer-8*8>> = <<0::1*1, 0::1*1, num30::bitstring, 0::32>>
i / 100
end
defp extract_number(data) do
rknumhigh = OLE.get_int_4d(data, 4)
rknumlow = OLE.get_int_4d(data, 0)
sign = (rknumhigh &&& 0x80000000) >>> 31
exp = ((rknumhigh &&& 0x7ff00000) >>> 20) - 1023
mantissa = 0x100000 ||| (rknumhigh &&& 0x000fffff)
mantissa_low1 = (rknumlow &&& 0x80000000) >>> 31
mantissa_low2 = rknumlow &&& 0x7fffffff
value = if 20 - exp > 1023 do
0
else
mantissa / :math.pow(2, 20 - exp)
end
value
|> (fn v ->
if mantissa_low1 != 0 && (21 - exp) <= 1023 do
v + 1 / :math.pow(2, 21 - exp)
else
v
end
end).()
|> (fn v ->
if 52 - exp > 1023 do
v
else
v + mantissa_low2 / :math.pow(2, 52 - exp)
end
end).()
|> (fn v -> if sign != 0, do: v * (-1), else: v end).()
end
def read_codepage(stream, pos, excel) do
length = OLE.get_int_2d(stream, pos + 2)
record_data = binary_part(stream, pos + 4, length)
# offset: 0; size: 2; code page identifier
case OLE.get_int_2d(record_data, 0) |> codepage_to_name do
{:ok, codepage} -> parse(stream, pos + length + 4, %{excel | codepage: codepage})
{:error, reason} -> {:error, reason}
end
end
defp codepage_to_name(codepage) do
case codepage do
367 -> {:ok, "ASCII"} # ASCII
437 -> {:ok, "CP437"} # OEM US
720 -> {:error, "Code page 720 not supported."} # OEM Arabic
737 -> {:ok, "CP737"} # OEM Greek
775 -> {:ok, "CP775"} # OEM Baltic
850 -> {:ok, "CP850"} # OEM Latin I
852 -> {:ok, "CP852"} # OEM Latin II (Central European)
855 -> {:ok, "CP855"} # OEM Cyrillic
857 -> {:ok, "CP857"} # OEM Turkish
858 -> {:ok, "CP858"} # OEM Multilingual Latin I with Euro
860 -> {:ok, "CP860"} # OEM Portugese
861 -> {:ok, "CP861"} # OEM Icelandic
862 -> {:ok, "CP862"} # OEM Hebrew
863 -> {:ok, "CP863"} # OEM Canadian (French)
864 -> {:ok, "CP864"} # OEM Arabic
865 -> {:ok, "CP865"} # OEM Nordic
866 -> {:ok, "CP866"} # OEM Cyrillic (Russian)
869 -> {:ok, "CP869"} # OEM Greek (Modern)
874 -> {:ok, "CP874"} # ANSI Thai
932 -> {:ok, "CP932"} # ANSI Japanese Shift-JIS
936 -> {:ok, "CP936"} # ANSI Chinese Simplified GBK
949 -> {:ok, "CP949"} # ANSI Korean (Wansung)
950 -> {:ok, "CP950"} # ANSI Chinese Traditional BIG5
1200 -> {:ok, "UTF-16LE"} # UTF-16 (BIFF8)
1250 -> {:ok, "CP1250"} # ANSI Latin II (Central European)
1251 -> {:ok, "CP1251"} # ANSI Cyrillic
0 -> {:ok, "CP1252"} # CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
1252 -> {:ok, "CP1252"} # ANSI Latin I (BIFF4-BIFF7)
1253 -> {:ok, "CP1253"} # ANSI Greek
1254 -> {:ok, "CP1254"} # ANSI Turkish
1255 -> {:ok, "CP1255"} # ANSI Hebrew
1256 -> {:ok, "CP1256"} # ANSI Arabic
1257 -> {:ok, "CP1257"} # ANSI Baltic
1258 -> {:ok, "CP1258"} # ANSI Vietnamese
1361 -> {:ok, "CP1361"} # ANSI Korean (Johab)
10000 -> {:ok, "MAC"} # Apple Roman
10001 -> {:ok, "CP932"} # Macintosh Japanese
10002 -> {:ok, "CP950"} # Macintosh Chinese Traditional
10003 -> {:ok, "CP1361"} # Macintosh Korean
10006 -> {:ok, "MACGREEK"} # Macintosh Greek
10007 -> {:ok, "MACCYRILLIC"}# Macintosh Cyrillic
10008 -> {:ok, "CP936"} # Macintosh - Simplified Chinese (GB 2312)
10029 -> {:ok, "MACCENTRALEUROPE"} # Macintosh Central Europe
10079 -> {:ok, "MACICELAND"} # Macintosh Icelandic
10081 -> {:ok, "MACTURKISH"} # Macintosh Turkish
21010 -> {:ok, "UTF-16LE"} # UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
32768 -> {:ok, "MAC"} # Apple Roman
32769 -> {:error, "Code page 32769 not supported."} # ANSI Latin I (BIFF2-BIFF3)
65000 -> {:ok, "UTF-7"} # Unicode (UTF-7)
65001 -> {:ok, "UTF-8"} # Unicode (UTF-8)
_ -> {:error, "Unknown codepage: " <> codepage}
end
end
defp read_bof_default(stream, pos, excel) do
code = OLE.get_int_2d(stream, pos)
length = OLE.get_int_2d(stream, pos + 2)
case code != @xls_type_eof && pos < excel.data_size do
true -> read_bof_default(stream, pos + length + 4, excel)
false -> excel
end
end
defp read_datemode(stream, pos, excel) do
length = OLE.get_int_2d(stream, pos + 2)
record_data = binary_part(stream, pos + 4, length)
# offset: 0; size: 2; 0 = base 1900, 1 = base 1904
excel = if binary_part(record_data, 0, 1) == <<1>>, do: %{excel | base_date: @calendar_mac_1904}, else: excel
parse(stream, pos + length + 4, excel)
end
defp read_sheet(stream, pos, excel) do
length = OLE.get_int_2d(stream, pos + 2)
record_data = binary_part(stream, pos + 4, length)
# offset: 0; size: 4; absolute stream position of the BOF record of the sheet
# NOTE: not encrypted
rec_offset = OLE.get_int_4d(stream, pos + 4)
# offset: 4; size: 1; sheet state
sheet_state = case (binary_part(record_data, 4, 0)) do
<<1>> -> @sheetstate_hidden
<<2>> -> @sheetstate_veryhidden
_ -> @sheetstate_visible
end
# offset: 5; size: 1; sheet type
sheet_type = binary_part(record_data, 5, 1)
# offset: 6; size: var; sheet name
rec_name = case excel.version do
@xls_biff8 ->
binary_part(record_data, 6, byte_size(record_data) - 6)
|> ExofficeString.read_unicode_string_short
@xls_biff7 ->
binary_part(record_data, 6, byte_size(record_data) - 6)
|> ExofficeString.read_byte_string_short(excel.codepage)
end
offset_end = rec_offset + length
sheet = %{name: rec_name.value, offset: rec_offset, sheet_state: sheet_state, sheet_type: sheet_type, offset_end: offset_end}
parse(stream, pos + length + 4, %{excel | sheets: excel.sheets ++ [sheet]})
end
defp get_spliced_record_data(stream, pos, splice_offsets, data, i, @xls_type_continue) do
# offset: 2; size: 2; length
length = OLE.get_int_2d(stream, pos + 2)
data = data <> binary_part(stream, pos + 4, length)
splice_offsets = splice_offsets ++ [Enum.at(splice_offsets, i - 1) + length]
new_pos = pos + length + 4
get_spliced_record_data(stream, new_pos, splice_offsets, data, i + 1, OLE.get_int_2d(stream, new_pos))
end
defp get_spliced_record_data(_, pos, splice_offsets, data, _, _) do
{data, splice_offsets, pos}
end
defp read_sst(stream, pos, %Excel2003{shared_strings_tid: tid} = excel) do
# get spliced record data
{record_data, splice_offsets, pos} = get_spliced_record_data(stream, pos, [0], <<>>, 1, @xls_type_continue)
nm = OLE.get_int_4d(record_data, 4)
0..nm - 1
|> Stream.scan({8, 0}, fn _, {pos, index} ->
{num_chars, pos} = {OLE.get_int_2d(record_data, pos), pos + 2}
{option_flags, pos} = {OLE.decoded_binary_at(record_data, pos), pos + 1}
# bit: 0; mask: 0x01; 0 = compressed; 1 = uncompressed
is_compressed = (option_flags &&& 0x01) == 0
# bit: 2; mask: 0x02; 0 = ordinary; 1 = Asian phonetic
has_asian = (option_flags &&& 0x04) != 0
# bit: 3; mask: 0x03; 0 = ordinary; 1 = Rich-Text
has_rich_text = (option_flags &&& 0x08) != 0
# number of Rich-Text formatting runs
{formatting_runs, pos} = case has_rich_text do
true -> {OLE.get_int_2d(record_data, pos), pos + 2}
false -> {nil, pos}
end
# size of Asian phonetic setting
{extended_run_length, pos} = case has_asian do
true -> {OLE.get_int_2d(record_data, pos), pos + 4}
false -> {nil, pos}
end
len = if is_compressed, do: num_chars, else: num_chars * 2
limit_pos = Enum.drop_while(splice_offsets, &(pos > &1)) |> List.first
{ret_str, is_compressed, pos} = case pos + len <= limit_pos do
true ->
{binary_part(record_data, pos, len), is_compressed, pos + len}
false ->
# character array is split between records
# first part of character array
ret_str = binary_part(record_data, pos, limit_pos - pos)
bytes_read = limit_pos - pos
# remaining characters in Unicode string
chars_left = num_chars - (if is_compressed, do: bytes_read, else: bytes_read / 2)
pos = limit_pos
get_ret_str(record_data, splice_offsets, ret_str, chars_left, pos, is_compressed)
end
# convert to UTF-8
ret_str = ExofficeString.encode_utf_16(ret_str, is_compressed)
# read additional Rich-Text information, if any
{_fmt_runs, pos} = case has_rich_text do
true ->
# list of formatting runs
fmt_runs = Enum.reduce(0..formatting_runs - 1, fn i, acc ->
# first formatted character; zero-based
char_pos = OLE.get_int_2d(record_data, pos + i * 4)
# index to font record
font_index = OLE.get_int_2d(record_data, pos + 2 + i * 4)
acc = case acc do
0 -> [0]
_ -> acc
end
acc ++ [[char_pos, font_index]]
end)
{fmt_runs, pos + 4 * formatting_runs}
false -> {[], pos}
end
pos = if has_asian, do: pos + extended_run_length, else: pos
IO.puts "sst: #{inspect {index, ret_str}}"
:ets.insert(tid, {index, ret_str})
{pos, index + 1}
end)
|> Enum.into([])
parse(stream, pos, excel)
end
defp get_ret_str(record_data, splice_offsets, ret_str, chars_left, pos, is_compressed) when chars_left > 0 do
# look up next limit position, in case the string span more than one continue record
limit_pos = Enum.drop_while(splice_offsets, &(pos >= &1)) |> List.first
# repeated option flags
# OpenOffice.org documentation 5.21
{option, pos} = {OLE.decoded_binary_at(record_data, pos), pos + 1}
{ret_str, chars_left, is_compressed, len} = cond do
is_compressed && option == 0 ->
# 1st fragment compressed
# this fragment compressed
len = min(chars_left, limit_pos - pos) |> round
{ret_str <> binary_part(record_data, pos, len), chars_left - len, true, len}
!is_compressed && option != 0 ->
# 1st fragment uncompressed
# this fragment uncompressed
len = min(chars_left * 2, limit_pos - pos) |> round
{ret_str <> binary_part(record_data, pos, len), round(chars_left - len / 2), false, len}
!is_compressed && option == 0 ->
# 1st fragment uncompressed
# this fragment compressed
len = min(chars_left, limit_pos - pos) |> round
ret_str = Enum.reduce(0..len - 1, ret_str, fn i, acc ->
acc <> binary_part(record_data, pos + i, 1) <> <<0>>
end)
{ret_str, chars_left - len, false, len}
true ->
# 1st fragment compressed
# this fragment uncompressed
ret_str = Enum.reduce(0..byte_size(ret_str) - 1, "", fn i, acc ->
acc <> binary_part(ret_str, i, 1) <> <<0>>
end)
len = min(chars_left * 2, limit_pos - pos) |> round
ret_str = ret_str <> binary_part(record_data, pos, len)
{ret_str, round(chars_left - len / 2), false, len}
end
get_ret_str(record_data, splice_offsets, ret_str, chars_left, pos + len, is_compressed)
end
defp get_ret_str(_record_data, _splice_offsets, ret_str, _, pos, is_compressed) do
{ret_str, is_compressed, pos}
end
defp read_eof(stream, pos, excel), do: {stream, pos, excel}
defp get_shared_string(tid, index) do
:ets.lookup(tid, index)
|> List.first
|> elem(1)
end
defp get_code(code) do
m = %{
1024 => "REKEY_BLOCK",
1280 => "XLS_BIFF7",
1536 => "XLS_BIFF8",
545 => "XLS_TYPE_ARRAY",
513 => "XLS_TYPE_BLANK",
2057 => "XLS_TYPE_BOF",
517 => "XLS_TYPE_BOOLERR",
41 => "XLS_TYPE_BOTTOMMARGIN",
66 => "XLS_TYPE_CODEPAGE",
125 => "XLS_TYPE_COLINFO",
60 => "XLS_TYPE_CONTINUE",
446 => "XLS_TYPE_DATAVALIDATION",
434 => "XLS_TYPE_DATAVALIDATIONS",
34 => "XLS_TYPE_DATEMODE",
215 => "XLS_TYPE_DBCELL",
549 => "XLS_TYPE_DEFAULTROWHEIGHT",
85 => "XLS_TYPE_DEFCOLWIDTH",
24 => "XLS_TYPE_DEFINEDNAME",
512 => "XLS_TYPE_DIMENSION",
10 => "XLS_TYPE_EOF",
430 => "XLS_TYPE_EXTERNALBOOK",
35 => "XLS_TYPE_EXTERNNAME",
23 => "XLS_TYPE_EXTERNSHEET",
255 => "XLS_TYPE_EXTSST",
47 => "XLS_TYPE_FILEPASS",
49 => "XLS_TYPE_FONT",
21 => "XLS_TYPE_FOOTER",
1054 => "XLS_TYPE_FORMAT",
6 => "XLS_TYPE_FORMULA",
131 => "XLS_TYPE_HCENTER",
20 => "XLS_TYPE_HEADER",
27 => "XLS_TYPE_HORIZONTALPAGEBREAKS",
440 => "XLS_TYPE_HYPERLINK",
127 => "XLS_TYPE_IMDATA",
523 => "XLS_TYPE_INDEX",
516 => "XLS_TYPE_LABEL",
253 => "XLS_TYPE_LABELSST",
38 => "XLS_TYPE_LEFTMARGIN",
229 => "XLS_TYPE_MERGEDCELLS",
236 => "XLS_TYPE_MSODRAWING",
235 => "XLS_TYPE_MSODRAWINGGROUP",
190 => "XLS_TYPE_MULBLANK",
189 => "XLS_TYPE_MULRK",
28 => "XLS_TYPE_NOTE",
515 => "XLS_TYPE_NUMBER",
93 => "XLS_TYPE_OBJ",
99 => "XLS_TYPE_OBJECTPROTECT",
2187 => "XLS_TYPE_PAGELAYOUTVIEW",
161 => "XLS_TYPE_PAGESETUP",
146 => "XLS_TYPE_PALETTE",
65 => "XLS_TYPE_PANE",
19 => "XLS_TYPE_PASSWORD",
43 => "XLS_TYPE_PRINTGRIDLINES",
18 => "XLS_TYPE_PROTECT",
2152 => "XLS_TYPE_RANGEPROTECTION",
39 => "XLS_TYPE_RIGHTMARGIN",
638 => "XLS_TYPE_RK",
520 => "XLS_TYPE_ROW",
221 => "XLS_TYPE_SCENPROTECT",
160 => "XLS_TYPE_SCL",
29 => "XLS_TYPE_SELECTION",
1212 => "XLS_TYPE_SHAREDFMLA",
133 => "XLS_TYPE_SHEET",
2146 => "XLS_TYPE_SHEETLAYOUT",
129 => "XLS_TYPE_SHEETPR",
2151 => "XLS_TYPE_SHEETPROTECTION",
252 => "XLS_TYPE_SST",
519 => "XLS_TYPE_STRING",
659 => "XLS_TYPE_STYLE",
40 => "XLS_TYPE_TOPMARGIN",
438 => "XLS_TYPE_TXO",
65535 => "XLS_TYPE_UNKNOWN",
132 => "XLS_TYPE_VCENTER",
26 => "XLS_TYPE_VERTICALPAGEBREAKS",
574 => "XLS_TYPE_WINDOW2",
224 => "XLS_TYPE_XF",
2173 => "XLS_TYPE_XFEXT",
5 => "XLS_WORKBOOKGLOBALS",
16 => "XLS_WORKSHEET"
}
m[code]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment