Skip to content

Instantly share code, notes, and snippets.

@smpallen99
Created April 5, 2014 18:40
Show Gist options
  • Save smpallen99/9996137 to your computer and use it in GitHub Desktop.
Save smpallen99/9996137 to your computer and use it in GitHub Desktop.
An approach to handling serialized binary data (i.e. with a C program)
defmodule CStructure do
alias :binary, as: Bin
@defmoudule """
Parses a binary received from the line that created by sending a C structure.
The structure is defined by a keyword list of the field name, the message type
and the size of the field.
Integer fields are reversed and string fields have 0 padding removed.
The output is a keyword list of the field name and that resulting data
Options:
* schema - the list of field definitions
* endian - the endianness [:little, :big] default: :little
Notes:
* integer lengths are specified in bits
* string lengths are specified in byte count
* defaults to integer
* defaults to little endian
## Examples
iex(1)> schema = [one: [string: 5], two: [integer: 8], three: 32]
[one: [string: 05], two: [integer: 08], three: 20]
iex(2)> binary = <<0,"ab",0,0>> <> <<99, 1,2,3,4>>
<<0, 97, 98, 0, 0, 99, 1, 2, 3, 4>>
iex(3)> CStructure.build(binary, schema)
[one: "ab", two: 63, three: 0201]
iex(4)>
Another example is how to wrap it in a record for easier access
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do
...(1)> use CStructure, schema: [one: [string: 5], two: 8, three: 32]
...(1)> end
{:module, MyDataStr,
<<70, 79, 82, 49, 0, 0, 21, 40, 66, 69, 65, 77, 65, 116, 111, 109, 0, 0, 1, 33, 0, 0, 0, 33, 16, 69, 108, 105, 120, 105, 114, 46, 77, 121, 68, 97, 116, 97, 83, 116, 114, 8, 95, 95, 105, 110, 102, 111, 95, 95, ...>>,
{:load, 1}}
iex(2)> data = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
MyDataStr[one: "ab", two: 99, three: 67305985]
iex(3)> data.one
"ab"
iex(4)> data.two
99
iex(5)> data.three
67305985
iex(6)>
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do
...(1)> use CStructure, endian: :big, schema: [one: [string: 5], two: 8, three: 32]
...(1)> end
"""
defmacro __using__(opts) do
schemaa = Keyword.get(opts, :schema)
unless is_list(schemaa), do: throw({:error, "schema must be a list"})
endian = Keyword.get(opts, :endian, :little)
unless endian in [:big, :little] do
throw {:error, "unsupported endian: #{endian}"}
end
quote do
import CStructure
if unquote(schemaa) do
def schema(r), do: unquote(schemaa)
def schema(), do: unquote(schemaa)
def load(binary) do
CStructure.build(binary, unquote(schemaa), unquote(endian)) |> __MODULE__.new
end
def serialize(r) do
CStructure.serialize(r.to_keywords, unquote(schemaa), unquote(endian))
end
def size(r), do: size
def size() do
CStructure.get_size(unquote(schemaa))
end
end
end
end
@doc """
Parse the binary message given the provides schema keyword list
Return a keyword list of the parsed data
"""
def build(binary, schema, endian \\ :little) when is_binary(binary) do
_build([], binary, schema, endian)
end
@doc """
Serialize a data structure defined by the schema macro
Returns the binary data
"""
def serialize(list, schema, endian \\ :little) when is_list(list) do
_serialize(<<>>, list, schema, endian)
end
def get_size(schema) do
Enum.reduce(schema, 0, fn({_, val}, acc) -> acc + _get_size(val) end)
end
###############
# Private helpers
defp _get_size(val) when is_integer(val), do: val
defp _get_size([{:string, len}]), do: (len * 8)
defp _get_size([{:integer, len}]), do: len
defp _get_size([{:record, module}]), do: module.size
defp _get_size([{:list, {size, count}}]) when is_integer(size), do: size * count
defp _get_size([{:list, {type, count}}]) do
_get_size(type) * count
end
defp _get_size(_, _), do: throw({:error, "invalid descriptor"})
# Recursive builders. Converts the provided binary data into
# a keyword list of the record fields by applying the provided
# schema to the binary data
###############
# Loading
defp _build(result, _, [], _endian), do: result
defp _build(result, <<>>, _, _endian), do: result
defp _build(result, binary, [{name, [{type, size}]} | tail], endian) do
_build(type, result, binary, [{name, size}] ++ tail, endian)
end
defp _build(result, binary, [{name, size} | tail], endian) do
_build(:integer, result, binary, [{name, size}] ++ tail, endian)
end
defp _build(type, result, binary, [{name, sz} | tail], endian) do
{stripped, rest} = construct type, binary, sz, endian
_build(result ++ [{name, stripped}], rest, tail, endian)
end
# Decodes a schema tuple's binary data.
# returns a tuple {data, remaining_binary}
defp construct(:list, binary, {[{type, sz}], count}, endian) do
1..count |> Enum.map_reduce(binary, fn(_, acc) ->
construct(type, acc, sz, endian)
end)
end
defp construct(:list, binary, {sz, count}, endian) do
construct(:list, binary, {[{:integer, sz}], count}, endian)
end
defp construct(:integer, binary, sz, endian) do
sz = div(sz,8)
num = :binary.part(binary, 0, sz) |> :binary.decode_unsigned(endian)
rest = Bin.part(binary, sz, size(binary) - sz)
{num, rest}
end
defp construct(:string, binary, sz, _endian) do
sz = sz * 8
<<first::[bitstring, size(sz)], rest::bitstring>> = binary
stripped = String.split(first, <<0>>) |>
Enum.reduce("", &(if &1 != "", do: &2 <> &1, else: &2))
{stripped, rest}
end
defp construct(:record, binary, module_name, _indian) do
size = byte_size(binary)
module_size = div(module_name.size, 8)
if size >= module_size do
module = module_name.load(:binary.part(binary, 0, module_size))
rest = :binary.part(binary, module_size, size - module_size)
else
module = nil
rest = ""
end
{module, rest}
end
###################
# Serialization
defp _serialize(result, _, [], _endian), do: result
defp _serialize(result, list, [{name, [{type, size}]} | tail], endian) do
_serialize(type, result, list, [{name, size}] ++ tail, endian)
end
defp _serialize(result, list, [{name, sz} | tail], endian) do
_serialize(:integer, result, list, [{name, sz}] ++ tail, endian)
end
defp _serialize(type, result, list, [{name, sz} | tail], endian) do
{binary, rest} = deconstruct type, list, name, sz, endian
_serialize(result <> binary, rest, tail, endian)
end
defp deconstruct(:integer, list, name, sz, endian) do
num = Keyword.get list, name
rest = Keyword.delete list, name
unpadded = Bin.encode_unsigned num, endian
padding_cnt = sz - bit_size(unpadded)
padding = <<0::[integer, size(padding_cnt)]>>
binary = if endian == :little do
unpadded <> padding
else
padding <> unpadded
end
{binary, Keyword.delete(list, name)}
end
defp deconstruct(:string, list, name, sz, _endian) do
sz = sz * 8
value = Keyword.get(list, name)
val_sz = bit_size value
binary = if val_sz < sz do
new_sz = sz - val_sz
value <> <<0::size(new_sz)>>
else
<<new_val::[bitstring, size(sz)], _::bitstring>> = value
new_val
end
{binary, Keyword.delete(list, name)}
end
defp deconstruct(:record, list, name, _module_name, endian) do
record = Keyword.get(list, name)
{record.serialize, Keyword.delete(list, name)}
end
end
Code.require_file "../../../test_helper.exs", __ENV__.file
defmodule MdseTest.CStructure do
use Amrita.Sweet
describe "build" do
it "extracts the int and short data" do
binary = [3,0,0,0,1,2] |> list_to_bitstring |> elem(0)
assert CStructure.build(binary, [int_val: 32, word_val: 16]) == [int_val: 3, word_val: 0x201]
end
end
describe "serialize" do
it "handles integer data" do
binary = <<2,3,0,0, 99, 1,2,3,4>>
schema = [one: 32, two: 8, three: 32]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles integer data as option" do
binary = <<2,3,0,0, 99, 1,2,3,4>>
schema = [one: [integer: 32], two: [integer: 8], three: 32]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles string data" do
binary = <<2,3>> <> "test"
schema = [one: 16, two: [string: 4]]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles short string data" do
binary = <<2,3>> <> "test" <> <<0,0>>
schema = [one: 16, two: [string: 6]]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles long string data" do
binary = <<2,3>> <> "test"
schema = [one: 16, two: [string: 4]]
list = [one: 0x302, two: "testing"]
assert CStructure.serialize(list, schema) == binary
end
end
defrecord MyData, one: 0, two: 0, three: 0 do
use CStructure, schema: [one: 32, two: 8, three: 32]
end
defrecord MyDataInt, one: '', two: 0, three: 0 do
use CStructure, schema: [one: [integer: 32], two: 8, three: 32]
end
defrecord MyDataStr, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32]
end
defrecord MyD, one: 0, two: 0 do
use CStructure, schema: [one: 16, two: 32]
end
defrecord MyDataStrBig, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :big
end
defrecord MyDataStrLittle, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :little
end
it "handles new schema" do
data = MyD.new
assert data.schema == [one: 16, two: 32]
end
it "loads data" do
binary = <<1,2,3,4,5,6>>
data = MyD.load binary
assert data.one == 0x201
assert data.two == 0x6050403
end
it "handles loading long form" do
data = MyData.load(<<2,3,0,0, 99, 1,2,3,4>>)
assert data.one == 0x0302
assert data.two == 99
assert data.three == 0x04030201
end
it "handles loading long form for integer field" do
data = MyDataInt.load(<<2,3,0,0, 99, 1,2,3,4>>)
assert data.one == 0x0302
assert data.two == 99
assert data.three == 0x04030201
end
it "handles loading long form for string field" do
data = MyDataStr.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x04030201
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x04030201
end
it "handles loading long form for string field little" do
data = MyDataStrLittle.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x04030201
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x04030201
end
it "handles loading long form for string field big" do
data = MyDataStrBig.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x1020304
data2 = MyDataStrBig.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x1020304
end
it "serializes string data" do
binary = "steve" <> <<99, 1,2,3,4>>
data = MyDataStr.load(binary)
assert data.serialize == binary
end
defrecord LongTest, one: 0, two: 0, three: 99 do
use CStructure, schema: [one: 32, two: 16, three: 8], endian: :big
end
it "loads shorter binary data" do
msg = <<0x1020304::32, 0x506::16>>
data = LongTest.load msg
assert data.one == 0x1020304
assert data.two == 0x506
assert data.three == 99
end
it "loads longer binary data" do
msg = <<0x1020304::32, 0x506::16, 0x99ff::16>>
data = LongTest.load msg
assert data.one == 0x1020304
assert data.two == 0x506
assert data.three == 0x99
end
defrecord LongStrTest, one: 0, two: "", three: 99 do
use CStructure, schema: [one: 32, two: [string: 5], three: 8], endian: :big
end
it "loads shorter string data" do
msg = <<1::32, "abcde">>
data = LongStrTest.load msg
assert data.one == 1
assert data.two == "abcde"
assert data.three == 99
end
it "loads longer string data" do
msg = <<1::32, "abcde", 0x88ff::16>>
data = LongStrTest.load msg
assert data.one == 1
assert data.two == "abcde"
assert data.three == 0x88
end
defrecord MyMixed, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: [integer: 32]]
end
defrecord SubData, one: 0, two: 0 do
use CStructure, endian: :big, schema: [ one: 16, two: 16 ]
end
defrecord Nested, one: 0, nested: nil, three: 0 do
use CStructure, endian: :big, schema: [
one: 32,
nested: [record: SubData],
three: 16
]
end
describe "size" do
it "returns size of integer structure" do
assert MyData.size == 72
end
it "returns size of mixed string integer structure" do
assert MyMixed.size == 80
end
it "returns size of nested record" do
assert Nested.size == 80
end
end
it "loads nested records" do
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>>
data = Nested.load msg
data.one |> 0xffffffff
data.three |> 0x1010
data.nested.one |> 1
data.nested.two |> 2
end
it "serilizes nested records" do
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>>
data = Nested.load msg
assert data.serialize == msg
end
defrecord MyListInt, one: 32, two: [] do
use CStructure, schema: [one: 32, two: [list: {16, 2}]], endian: :big
end
it "loads list" do
msg = <<0x1234::32, 1::16, 2::16>>
data = MyListInt.load msg
assert data == MyListInt.new([{:one, 0x1234}, {:two, [0x0001, 0x0002]}])
IO.inspect data
assert data.one == 0x1234
assert Enum.at(data.two, 0) == 1
assert Enum.at(data.two, 1) == 2 # review this test case
end
defrecord MyListStr, one: 32, two: [] do
use CStructure, schema: [one: 32, two: [list: {[string: 2], 3}]], endian: :big
end
it "gets size of string array" do
assert MyListStr.size == 80
end
it "load list of strings" do
msg = <<0x4321::32, "abcdef">>
data = MyListStr.load msg
assert data.one == 0x4321
assert Enum.at(data.two, 0) == "ab"
assert Enum.at(data.two, 1) == "cd"
assert Enum.at(data.two, 2) == "ef"
end
defrecord SubData2, f1: 0, f2: 0 do
use CStructure, endian: :big, schema: [ f1: 16, f2: 16 ]
end
defrecord MyListRecords, one: 0, two: [], three: 0 do
use CStructure, schema: [one: 32, two: [list: {[record: SubData2], 3}], three: 8],
endian: :big
end
it "gets size of record array" do
assert MyListRecords.size == 136
end
it "loads a list of records" do
msg = <<0x9999::32, 0x100::16, 0x101::16,
0x102::16, 0x103::16,
0xf104::16, 0x9823::16,
0xfe::8>>
data = MyListRecords.load msg
assert data.one == 0x9999
assert Enum.at(data.two, 0).f1 == 0x100
assert Enum.at(data.two, 0).f2 == 0x101
assert Enum.at(data.two, 1).f1 == 0x102
assert Enum.at(data.two, 1).f2 == 0x103
assert Enum.at(data.two, 2).f1 == 0xf104
assert Enum.at(data.two, 2).f2 == 0x9823
assert data.three == 0xfe
end
it "loads a subset of the array" do
msg = <<0x9999::32, 0x100::16, 0x101::16,
0x102::16, 0x103::16>>
data = MyListRecords.load msg
Enum.count(data.two) |> 3
List.last(data.two) |> nil
data.one |> 0x9999
Enum.at(data.two, 0).f1 |> 0x100
Enum.at(data.two, 0).f2 |> 0x101
Enum.at(data.two, 1).f1 |> 0x102
Enum.at(data.two, 1).f2 |> 0x103
end
defrecord SubData3, f1: 0, f2: 0 do
use CStructure, endian: :big, schema: [ f1: 16, f2: 8 ]
end
defrecord MyListRecords2, one: [], two: [], three: 0 do
use CStructure, schema: [one: [list: {[integer: 8], 4}],
two: [list: {[record: SubData3], 3}],
three: [list: {[string: 4], 2}]],
endian: :big
end
it "gets the size of record and string lists" do
MyListRecords2.size |> 168
end
it "load a list of records and strings" do
msg = <<0xfefdfcfb::32, 1::16, 2::8, 0xabcd::16, 0xbd::8, 0xffff::16, 0xdd::8,
"abcd", "good">>
data = MyListRecords2.load msg
Enum.at(data.one, 0) |> 0xfe
Enum.at(data.one, 1) |> 0xfd
Enum.at(data.one, 2) |> 0xfc
Enum.at(data.one, 3) |> 0xfb
Enum.at(data.two, 0).f1 |> 1
Enum.at(data.two, 0).f2 |> 2
Enum.at(data.two, 1).f1 |> 0xabcd
Enum.at(data.two, 1).f2 |> 0xbd
Enum.at(data.two, 2).f1 |> 0xffff
Enum.at(data.two, 2).f2 |> 0xdd
Enum.at(data.three, 0) |> "abcd"
Enum.at(data.three, 1) |> "good"
end
defrecord SubDataTop, sub_data: nil, two: 0 do
use CStructure, endian: :big, schema: [sub_data: [list: {[record: SubData3], 2}], two: 16]
def get_sub_data(inx, r), do: Enum.at(r.sub_data, inx)
end
defrecord DoubleNested, one: 0, top: nil do
use CStructure, endian: :big, schema: [one: 32, top: [record: SubDataTop]]
end
it "loads 2nd level nested structure list" do
bin_sub_3_1 = <<0x1234::16, 0xaa::8>>
bin_sub_3_2 = <<0x4321::16, 0x22::8>>
bin_top = <<bin_sub_3_1::binary, bin_sub_3_2::binary, 0x99::16>>
bin_msg = <<0x90909090::32, bin_top::binary>>
data = DoubleNested.load bin_msg
data.one |> 0x90909090
data.top.two |> 0x99
data.top.get_sub_data(0).f1 |>0x1234
data.top.get_sub_data(0).f2 |> 0xaa
data.top.get_sub_data(1).f2 |> 0x22
end
end
@chendo
Copy link

chendo commented Jul 5, 2014

Noticed it didn't do bitfields (which is what I was really after) but I had no luck trying to hack it in :(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment