Skip to content

Instantly share code, notes, and snippets.

@smpallen99
Created April 5, 2014 18:40
Show Gist options
  • Save smpallen99/9996137 to your computer and use it in GitHub Desktop.
Save smpallen99/9996137 to your computer and use it in GitHub Desktop.
An approach to handling serialized binary data (i.e. with a C program)
defmodule CStructure do
alias :binary, as: Bin
@defmoudule """
Parses a binary received from the line that created by sending a C structure.
The structure is defined by a keyword list of the field name, the message type
and the size of the field.
Integer fields are reversed and string fields have 0 padding removed.
The output is a keyword list of the field name and that resulting data
Options:
* schema - the list of field definitions
* endian - the endianness [:little, :big] default: :little
Notes:
* integer lengths are specified in bits
* string lengths are specified in byte count
* defaults to integer
* defaults to little endian
## Examples
iex(1)> schema = [one: [string: 5], two: [integer: 8], three: 32]
[one: [string: 05], two: [integer: 08], three: 20]
iex(2)> binary = <<0,"ab",0,0>> <> <<99, 1,2,3,4>>
<<0, 97, 98, 0, 0, 99, 1, 2, 3, 4>>
iex(3)> CStructure.build(binary, schema)
[one: "ab", two: 63, three: 0201]
iex(4)>
Another example is how to wrap it in a record for easier access
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do
...(1)> use CStructure, schema: [one: [string: 5], two: 8, three: 32]
...(1)> end
{:module, MyDataStr,
<<70, 79, 82, 49, 0, 0, 21, 40, 66, 69, 65, 77, 65, 116, 111, 109, 0, 0, 1, 33, 0, 0, 0, 33, 16, 69, 108, 105, 120, 105, 114, 46, 77, 121, 68, 97, 116, 97, 83, 116, 114, 8, 95, 95, 105, 110, 102, 111, 95, 95, ...>>,
{:load, 1}}
iex(2)> data = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
MyDataStr[one: "ab", two: 99, three: 67305985]
iex(3)> data.one
"ab"
iex(4)> data.two
99
iex(5)> data.three
67305985
iex(6)>
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do
...(1)> use CStructure, endian: :big, schema: [one: [string: 5], two: 8, three: 32]
...(1)> end
"""
defmacro __using__(opts) do
schemaa = Keyword.get(opts, :schema)
unless is_list(schemaa), do: throw({:error, "schema must be a list"})
endian = Keyword.get(opts, :endian, :little)
unless endian in [:big, :little] do
throw {:error, "unsupported endian: #{endian}"}
end
quote do
import CStructure
if unquote(schemaa) do
def schema(r), do: unquote(schemaa)
def schema(), do: unquote(schemaa)
def load(binary) do
CStructure.build(binary, unquote(schemaa), unquote(endian)) |> __MODULE__.new
end
def serialize(r) do
CStructure.serialize(r.to_keywords, unquote(schemaa), unquote(endian))
end
def size(r), do: size
def size() do
CStructure.get_size(unquote(schemaa))
end
end
end
end
@doc """
Parse the binary message given the provides schema keyword list
Return a keyword list of the parsed data
"""
def build(binary, schema, endian \\ :little) when is_binary(binary) do
_build([], binary, schema, endian)
end
@doc """
Serialize a data structure defined by the schema macro
Returns the binary data
"""
def serialize(list, schema, endian \\ :little) when is_list(list) do
_serialize(<<>>, list, schema, endian)
end
def get_size(schema) do
Enum.reduce(schema, 0, fn({_, val}, acc) -> acc + _get_size(val) end)
end
###############
# Private helpers
defp _get_size(val) when is_integer(val), do: val
defp _get_size([{:string, len}]), do: (len * 8)
defp _get_size([{:integer, len}]), do: len
defp _get_size([{:record, module}]), do: module.size
defp _get_size([{:list, {size, count}}]) when is_integer(size), do: size * count
defp _get_size([{:list, {type, count}}]) do
_get_size(type) * count
end
defp _get_size(_, _), do: throw({:error, "invalid descriptor"})
# Recursive builders. Converts the provided binary data into
# a keyword list of the record fields by applying the provided
# schema to the binary data
###############
# Loading
defp _build(result, _, [], _endian), do: result
defp _build(result, <<>>, _, _endian), do: result
defp _build(result, binary, [{name, [{type, size}]} | tail], endian) do
_build(type, result, binary, [{name, size}] ++ tail, endian)
end
defp _build(result, binary, [{name, size} | tail], endian) do
_build(:integer, result, binary, [{name, size}] ++ tail, endian)
end
defp _build(type, result, binary, [{name, sz} | tail], endian) do
{stripped, rest} = construct type, binary, sz, endian
_build(result ++ [{name, stripped}], rest, tail, endian)
end
# Decodes a schema tuple's binary data.
# returns a tuple {data, remaining_binary}
defp construct(:list, binary, {[{type, sz}], count}, endian) do
1..count |> Enum.map_reduce(binary, fn(_, acc) ->
construct(type, acc, sz, endian)
end)
end
defp construct(:list, binary, {sz, count}, endian) do
construct(:list, binary, {[{:integer, sz}], count}, endian)
end
defp construct(:integer, binary, sz, endian) do
sz = div(sz,8)
num = :binary.part(binary, 0, sz) |> :binary.decode_unsigned(endian)
rest = Bin.part(binary, sz, size(binary) - sz)
{num, rest}
end
defp construct(:string, binary, sz, _endian) do
sz = sz * 8
<<first::[bitstring, size(sz)], rest::bitstring>> = binary
stripped = String.split(first, <<0>>) |>
Enum.reduce("", &(if &1 != "", do: &2 <> &1, else: &2))
{stripped, rest}
end
defp construct(:record, binary, module_name, _indian) do
size = byte_size(binary)
module_size = div(module_name.size, 8)
if size >= module_size do
module = module_name.load(:binary.part(binary, 0, module_size))
rest = :binary.part(binary, module_size, size - module_size)
else
module = nil
rest = ""
end
{module, rest}
end
###################
# Serialization
defp _serialize(result, _, [], _endian), do: result
defp _serialize(result, list, [{name, [{type, size}]} | tail], endian) do
_serialize(type, result, list, [{name, size}] ++ tail, endian)
end
defp _serialize(result, list, [{name, sz} | tail], endian) do
_serialize(:integer, result, list, [{name, sz}] ++ tail, endian)
end
defp _serialize(type, result, list, [{name, sz} | tail], endian) do
{binary, rest} = deconstruct type, list, name, sz, endian
_serialize(result <> binary, rest, tail, endian)
end
defp deconstruct(:integer, list, name, sz, endian) do
num = Keyword.get list, name
rest = Keyword.delete list, name
unpadded = Bin.encode_unsigned num, endian
padding_cnt = sz - bit_size(unpadded)
padding = <<0::[integer, size(padding_cnt)]>>
binary = if endian == :little do
unpadded <> padding
else
padding <> unpadded
end
{binary, Keyword.delete(list, name)}
end
defp deconstruct(:string, list, name, sz, _endian) do
sz = sz * 8
value = Keyword.get(list, name)
val_sz = bit_size value
binary = if val_sz < sz do
new_sz = sz - val_sz
value <> <<0::size(new_sz)>>
else
<<new_val::[bitstring, size(sz)], _::bitstring>> = value
new_val
end
{binary, Keyword.delete(list, name)}
end
defp deconstruct(:record, list, name, _module_name, endian) do
record = Keyword.get(list, name)
{record.serialize, Keyword.delete(list, name)}
end
end
Code.require_file "../../../test_helper.exs", __ENV__.file
defmodule MdseTest.CStructure do
use Amrita.Sweet
describe "build" do
it "extracts the int and short data" do
binary = [3,0,0,0,1,2] |> list_to_bitstring |> elem(0)
assert CStructure.build(binary, [int_val: 32, word_val: 16]) == [int_val: 3, word_val: 0x201]
end
end
describe "serialize" do
it "handles integer data" do
binary = <<2,3,0,0, 99, 1,2,3,4>>
schema = [one: 32, two: 8, three: 32]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles integer data as option" do
binary = <<2,3,0,0, 99, 1,2,3,4>>
schema = [one: [integer: 32], two: [integer: 8], three: 32]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles string data" do
binary = <<2,3>> <> "test"
schema = [one: 16, two: [string: 4]]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles short string data" do
binary = <<2,3>> <> "test" <> <<0,0>>
schema = [one: 16, two: [string: 6]]
list = CStructure.build(binary, schema)
assert CStructure.serialize(list, schema) == binary
end
it "handles long string data" do
binary = <<2,3>> <> "test"
schema = [one: 16, two: [string: 4]]
list = [one: 0x302, two: "testing"]
assert CStructure.serialize(list, schema) == binary
end
end
defrecord MyData, one: 0, two: 0, three: 0 do
use CStructure, schema: [one: 32, two: 8, three: 32]
end
defrecord MyDataInt, one: '', two: 0, three: 0 do
use CStructure, schema: [one: [integer: 32], two: 8, three: 32]
end
defrecord MyDataStr, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32]
end
defrecord MyD, one: 0, two: 0 do
use CStructure, schema: [one: 16, two: 32]
end
defrecord MyDataStrBig, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :big
end
defrecord MyDataStrLittle, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :little
end
it "handles new schema" do
data = MyD.new
assert data.schema == [one: 16, two: 32]
end
it "loads data" do
binary = <<1,2,3,4,5,6>>
data = MyD.load binary
assert data.one == 0x201
assert data.two == 0x6050403
end
it "handles loading long form" do
data = MyData.load(<<2,3,0,0, 99, 1,2,3,4>>)
assert data.one == 0x0302
assert data.two == 99
assert data.three == 0x04030201
end
it "handles loading long form for integer field" do
data = MyDataInt.load(<<2,3,0,0, 99, 1,2,3,4>>)
assert data.one == 0x0302
assert data.two == 99
assert data.three == 0x04030201
end
it "handles loading long form for string field" do
data = MyDataStr.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x04030201
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x04030201
end
it "handles loading long form for string field little" do
data = MyDataStrLittle.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x04030201
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x04030201
end
it "handles loading long form for string field big" do
data = MyDataStrBig.load("steve" <> <<99, 1,2,3,4>>)
assert data.one == "steve"
assert data.two == 99
assert data.three == 0x1020304
data2 = MyDataStrBig.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>)
assert data2.one == "ab"
assert data2.two == 99
assert data2.three == 0x1020304
end
it "serializes string data" do
binary = "steve" <> <<99, 1,2,3,4>>
data = MyDataStr.load(binary)
assert data.serialize == binary
end
defrecord LongTest, one: 0, two: 0, three: 99 do
use CStructure, schema: [one: 32, two: 16, three: 8], endian: :big
end
it "loads shorter binary data" do
msg = <<0x1020304::32, 0x506::16>>
data = LongTest.load msg
assert data.one == 0x1020304
assert data.two == 0x506
assert data.three == 99
end
it "loads longer binary data" do
msg = <<0x1020304::32, 0x506::16, 0x99ff::16>>
data = LongTest.load msg
assert data.one == 0x1020304
assert data.two == 0x506
assert data.three == 0x99
end
defrecord LongStrTest, one: 0, two: "", three: 99 do
use CStructure, schema: [one: 32, two: [string: 5], three: 8], endian: :big
end
it "loads shorter string data" do
msg = <<1::32, "abcde">>
data = LongStrTest.load msg
assert data.one == 1
assert data.two == "abcde"
assert data.three == 99
end
it "loads longer string data" do
msg = <<1::32, "abcde", 0x88ff::16>>
data = LongStrTest.load msg
assert data.one == 1
assert data.two == "abcde"
assert data.three == 0x88
end
defrecord MyMixed, one: "", two: 0, three: 0 do
use CStructure, schema: [one: [string: 5], two: 8, three: [integer: 32]]
end
defrecord SubData, one: 0, two: 0 do
use CStructure, endian: :big, schema: [ one: 16, two: 16 ]
end
defrecord Nested, one: 0, nested: nil, three: 0 do
use CStructure, endian: :big, schema: [
one: 32,
nested: [record: SubData],
three: 16
]
end
describe "size" do
it "returns size of integer structure" do
assert MyData.size == 72
end
it "returns size of mixed string integer structure" do
assert MyMixed.size == 80
end
it "returns size of nested record" do
assert Nested.size == 80
end
end
it "loads nested records" do
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>>
data = Nested.load msg
data.one |> 0xffffffff
data.three |> 0x1010
data.nested.one |> 1
data.nested.two |> 2
end
it "serilizes nested records" do
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>>
data = Nested.load msg
assert data.serialize == msg
end
defrecord MyListInt, one: 32, two: [] do
use CStructure, schema: [one: 32, two: [list: {16, 2}]], endian: :big
end
it "loads list" do
msg = <<0x1234::32, 1::16, 2::16>>
data = MyListInt.load msg
assert data == MyListInt.new([{:one, 0x1234}, {:two, [0x0001, 0x0002]}])
IO.inspect data
assert data.one == 0x1234
assert Enum.at(data.two, 0) == 1
assert Enum.at(data.two, 1) == 2 # review this test case
end
defrecord MyListStr, one: 32, two: [] do
use CStructure, schema: [one: 32, two: [list: {[string: 2], 3}]], endian: :big
end
it "gets size of string array" do
assert MyListStr.size == 80
end
it "load list of strings" do
msg = <<0x4321::32, "abcdef">>
data = MyListStr.load msg
assert data.one == 0x4321
assert Enum.at(data.two, 0) == "ab"
assert Enum.at(data.two, 1) == "cd"
assert Enum.at(data.two, 2) == "ef"
end
defrecord SubData2, f1: 0, f2: 0 do
use CStructure, endian: :big, schema: [ f1: 16, f2: 16 ]
end
defrecord MyListRecords, one: 0, two: [], three: 0 do
use CStructure, schema: [one: 32, two: [list: {[record: SubData2], 3}], three: 8],
endian: :big
end
it "gets size of record array" do
assert MyListRecords.size == 136
end
it "loads a list of records" do
msg = <<0x9999::32, 0x100::16, 0x101::16,
0x102::16, 0x103::16,
0xf104::16, 0x9823::16,
0xfe::8>>
data = MyListRecords.load msg
assert data.one == 0x9999
assert Enum.at(data.two, 0).f1 == 0x100
assert Enum.at(data.two, 0).f2 == 0x101
assert Enum.at(data.two, 1).f1 == 0x102
assert Enum.at(data.two, 1).f2 == 0x103
assert Enum.at(data.two, 2).f1 == 0xf104
assert Enum.at(data.two, 2).f2 == 0x9823
assert data.three == 0xfe
end
it "loads a subset of the array" do
msg = <<0x9999::32, 0x100::16, 0x101::16,
0x102::16, 0x103::16>>
data = MyListRecords.load msg
Enum.count(data.two) |> 3
List.last(data.two) |> nil
data.one |> 0x9999
Enum.at(data.two, 0).f1 |> 0x100
Enum.at(data.two, 0).f2 |> 0x101
Enum.at(data.two, 1).f1 |> 0x102
Enum.at(data.two, 1).f2 |> 0x103
end
defrecord SubData3, f1: 0, f2: 0 do
use CStructure, endian: :big, schema: [ f1: 16, f2: 8 ]
end
defrecord MyListRecords2, one: [], two: [], three: 0 do
use CStructure, schema: [one: [list: {[integer: 8], 4}],
two: [list: {[record: SubData3], 3}],
three: [list: {[string: 4], 2}]],
endian: :big
end
it "gets the size of record and string lists" do
MyListRecords2.size |> 168
end
it "load a list of records and strings" do
msg = <<0xfefdfcfb::32, 1::16, 2::8, 0xabcd::16, 0xbd::8, 0xffff::16, 0xdd::8,
"abcd", "good">>
data = MyListRecords2.load msg
Enum.at(data.one, 0) |> 0xfe
Enum.at(data.one, 1) |> 0xfd
Enum.at(data.one, 2) |> 0xfc
Enum.at(data.one, 3) |> 0xfb
Enum.at(data.two, 0).f1 |> 1
Enum.at(data.two, 0).f2 |> 2
Enum.at(data.two, 1).f1 |> 0xabcd
Enum.at(data.two, 1).f2 |> 0xbd
Enum.at(data.two, 2).f1 |> 0xffff
Enum.at(data.two, 2).f2 |> 0xdd
Enum.at(data.three, 0) |> "abcd"
Enum.at(data.three, 1) |> "good"
end
defrecord SubDataTop, sub_data: nil, two: 0 do
use CStructure, endian: :big, schema: [sub_data: [list: {[record: SubData3], 2}], two: 16]
def get_sub_data(inx, r), do: Enum.at(r.sub_data, inx)
end
defrecord DoubleNested, one: 0, top: nil do
use CStructure, endian: :big, schema: [one: 32, top: [record: SubDataTop]]
end
it "loads 2nd level nested structure list" do
bin_sub_3_1 = <<0x1234::16, 0xaa::8>>
bin_sub_3_2 = <<0x4321::16, 0x22::8>>
bin_top = <<bin_sub_3_1::binary, bin_sub_3_2::binary, 0x99::16>>
bin_msg = <<0x90909090::32, bin_top::binary>>
data = DoubleNested.load bin_msg
data.one |> 0x90909090
data.top.two |> 0x99
data.top.get_sub_data(0).f1 |>0x1234
data.top.get_sub_data(0).f2 |> 0xaa
data.top.get_sub_data(1).f2 |> 0x22
end
end
@smpallen99
Copy link
Author

I'm using the following approach to deal with binary data from socket connections. This still has some work to do (i.e serialization does not work for lists yet). I will likely create create this as a separate package and publish on github when I have some time.

@chendo
Copy link

chendo commented Jul 5, 2014

Are you going to turn this into a library? I've been looking all over for something like this!

@chendo
Copy link

chendo commented Jul 5, 2014

Noticed it didn't do bitfields (which is what I was really after) but I had no luck trying to hack it in :(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment