Skip to content

Instantly share code, notes, and snippets.

@seriyps
Created September 2, 2013 23:23
Show Gist options
  • Save seriyps/6418131 to your computer and use it in GitHub Desktop.
Save seriyps/6418131 to your computer and use it in GitHub Desktop.
Gettext .mo format parser for Erlang
%%% @author Sergey Prokhorov <me@seriyps.ru>
%%% @copyright (C) 2013, Sergey Prokhorov
%%% @doc
%%% Simple gettext .mo file format parser for Erlang.
%%%
%%% Produce [{KeyPlurals::[binary()], TransPlurals::[binary()]}] orddict as
%%% output.
%%% Eg, for .po file (converted to .mo)
%%% <pre>
%%% msgid "Download"
%%% msgstr "Скачать"
%%%
%%% msgid "Stone"
%%% msgid_plural "Stones"
%%% msgstr[0] "Камень"
%%% msgstr[1] "Камня"
%%% msgstr[2] "Камней"
%%% </pre>
%%% it will produce
%%% <pre><code>
%%% [{[<<"Download">>], [<<"Скачать">>]},
%%% {[<<"Stone">>, <<"Stones">>], [<<"Камень">>, <<"Камня">>, <<"Камней">>]}]
%%% </code></pre>
%%% TODO: simple MIME parser (for key "" - translation metadata)
%%% TODO: plural form expression interpreter
%%% @end
%%% Created : 3 Sep 2013 by Sergey Prokhorov <me@seriyps.ru>
-module(mo_parser).
-export([parse/1, to_dict/1]).
-record(st,
{bin :: binary(),
obin :: binary(),
catalog=[] :: [{[binary()], [binary()]}],
bo :: little | big,
version :: integer(),
msg_cnt :: integer(),
orig_tab_offset :: integer(),
trans_tab_offset :: integer()}).
parse(Name) when is_list(Name) ->
{ok, Bin} = file:read_file(Name),
parse(Bin);
parse(Bin) when is_binary(Bin) ->
State = #st{bin=Bin, obin=Bin},
State2 = parse_magick(State),
State3 = parse_meta(State2),
parse_catalog(State3, 0).
to_dict(#st{catalog=Catalog}) ->
Catalog.
parse_magick(#st{bin = <<16#950412de:32/little, Ver:32/little, Rest/binary>>} = S) ->
S#st{bo=little, version=Ver, bin=Rest};
parse_magick(#st{bin = <<16#950412de:32/big, Ver:32/big, Rest/binary>>} = S) ->
S#st{bo=big, version=Ver, bin=Rest}.
parse_meta(#st{bo=little, bin = <<MsgCnt:32/little, OrigTabOffset:32/little,
TransTabOffset:32/little, Rest/binary>>} = S) ->
S#st{msg_cnt = MsgCnt, orig_tab_offset = OrigTabOffset,
trans_tab_offset = TransTabOffset, bin=Rest};
parse_meta(#st{bo=big, bin = <<MsgCnt:32/big, OrigTabOffset:32/big,
TransTabOffset:32/big, Rest/binary>>} = S) ->
S#st{msg_cnt = MsgCnt, orig_tab_offset = OrigTabOffset,
trans_tab_offset = TransTabOffset, bin=Rest}.
parse_catalog(#st{msg_cnt=N, catalog=Cat} = S, N) ->
S#st{catalog=lists:reverse(Cat)};
parse_catalog(#st{orig_tab_offset=OrigO, trans_tab_offset=TransO,
obin=Bin, bo=Bo, catalog=Catalog} = S, N) ->
Orig = get_string(N, OrigO, Bin, Bo),
Trans = get_string(N, TransO, Bin, Bo),
NewCatalog = [{Orig, Trans} | Catalog],
parse_catalog(S#st{catalog=NewCatalog}, N + 1).
get_string(N, O, Bin, little) ->
O1 = O + 8 * N,
<<_:O1/binary, Len:32/little, StringO:32/little, _/binary>> = Bin,
get_strings(StringO, Len, Bin);
get_string(N, O, Bin, big) ->
O1 = O + 8 * N,
<<_:O1/binary, Len:32/big, StringO:32/big, _/binary>> = Bin,
get_strings(StringO, Len, Bin).
get_strings(StringO, Len, Bin) ->
%% split by \0 to plural forms
<<_:StringO/binary, String:Len/binary, _/binary>> = Bin,
binary:split(String, [<<0>>], [global]).
@seriyps
Copy link
Author

seriyps commented Oct 27, 2014

This module was later included in https://github.com/seriyps/gettexter

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment