Created
May 8, 2014 20:54
-
-
Save jonathanpaulson/c62102541ae631c6688e to your computer and use it in GitHub Desktop.
Haskell Protocol Buffers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- | |
Implements binary encoding and decoding of protocol buffers | |
as specified in: | |
https://developers.google.com/protocol-buffers/docs/encoding | |
https://developers.google.com/protocol-buffers/docs/proto | |
TODO: | |
Handle [packed=true] | |
Parse .proto file | |
Define "nice" proto object spec | |
Produce nice proto object code from parsed .proto file | |
-} | |
import qualified Data.String.Utils as Utils | |
import Data.Bits | |
import Data.Int | |
import Data.Word | |
import Data.Char | |
import Data.List.Split | |
import Text.Printf | |
import Data.Map as M | |
import qualified Data.ByteString.Lazy as B | |
data Foo = Foo { bar :: Int32 } deriving Show | |
type RawPB = [(Int32, Value)] | |
data Value = | |
{- Wire type 0 -} | |
PBInt32 Int32 | PBInt64 Int64 | | |
PBUInt32 Word32 | PBUInt64 Word64 | | |
PBSInt32 Int32 | PBSInt64 Int64 | | |
PBBool Bool | PBEnum Int32 | | |
{- Wire type 1 -} | |
PBFixed64 Word64 | PBSFixed64 Int64 | {- Float64 -} | |
{- Wire type 2 -} | |
PBBytes [Word8] | PBString String| PBMessage RawPB | | |
{- Wire type 5 -} | |
PBFixed32 Word32 | PBSFixed32 Int32 {- Float32 -} | |
deriving Show | |
wireType :: Value -> Int32 | |
wireType (PBInt32 _) = 0 | |
wireType (PBUInt32 _) = 0 | |
wireType (PBSInt32 _) = 0 | |
wireType (PBInt64 _) = 0 | |
wireType (PBUInt64 _) = 0 | |
wireType (PBSInt64 _) = 0 | |
wireType (PBBool _) = 0 | |
wireType (PBEnum _) = 0 | |
wireType (PBFixed64 _) = 1 | |
wireType (PBSFixed64 _) = 1 | |
wireType (PBString _) = 2 | |
wireType (PBBytes _) = 2 | |
wireType (PBMessage _) = 2 | |
wireType (PBFixed32 _) = 5 | |
wireType (PBSFixed32 _) = 5 | |
{- ***** ENCODING ***** -} | |
encodePb :: RawPB -> [Word8] | |
encodePb pb = concat field_codes | |
where | |
field_codes = Prelude.map encode_field pb | |
header :: Int32 -> Value -> [Word8] | |
header k v = encodeVarint $ fromIntegral $ shiftL k 3 .|. fromIntegral (wireType v) | |
encode_field :: (Int32, Value) -> [Word8] | |
encode_field (k,v) = header k v ++ encodeValue v | |
encodeValue :: Value -> [Word8] | |
{- Wire type 0 -} | |
encodeValue (PBInt32 n) = encodeVarint $ fromIntegral n | |
encodeValue (PBUInt32 n) = encodeVarint $ fromIntegral n | |
encodeValue (PBSInt32 n) = encodeSigned $ fromIntegral n | |
encodeValue (PBInt64 n) = encodeVarint $ fromIntegral n | |
encodeValue (PBUInt64 n) = encodeVarint $ fromIntegral n | |
encodeValue (PBSInt64 n) = encodeSigned $ fromIntegral n | |
encodeValue (PBBool True) = [1] | |
encodeValue (PBBool False) = [0] | |
encodeValue (PBEnum n) = encodeVarint $ fromIntegral n | |
{- Wire type 1 -} | |
encodeValue (PBFixed64 n) = encodeFixed 8 (fromIntegral n) | |
encodeValue (PBSFixed64 n) = encodeFixed 8 (fromIntegral n) | |
{- Wire type 2 -} | |
encodeValue (PBBytes bs) = encodeLength bs ++ bs | |
encodeValue (PBString str) = encodeLength str ++ Prelude.map char_to_byte str | |
where | |
char_to_byte ch = fromIntegral $ ord ch | |
encodeValue (PBMessage msg) = encodeLength bs ++ bs | |
where | |
bs = encodePb msg | |
{- Wire type 5 -} | |
encodeValue (PBFixed32 n) = encodeFixed 4 (fromIntegral n) | |
encodeValue (PBSFixed32 n) = encodeFixed 4 (fromIntegral n) | |
encodeSigned :: Integer -> [Word8] | |
encodeSigned n | n >= 0 = encodeVarint $ fromIntegral $ shiftL n 1 | |
encodeSigned n | n < 0 = encodeVarint $ fromIntegral $ 1 + shiftL (abs n) 1 | |
encodeLength :: [a] -> [Word8] | |
encodeLength xs = encodeVarint $ fromIntegral $ length xs | |
encodeVarint :: Integer -> [Word8] | |
encodeVarint n | n < 0 = encodeVarint (2^70 + n) | |
encodeVarint n | n < 128 = [fromIntegral n] | |
encodeVarint n = (first_byte .|. bit 7) : encodeVarint (n `div` 128) | |
where | |
first_byte = fromIntegral $ n `mod` 128 | |
encodeFixed :: Integer -> Integer -> [Word8] | |
encodeFixed 0 _ = [] | |
encodeFixed i n = fromIntegral (n `mod` 256) : encodeFixed (i-1) (n `div` 256) | |
{- ***** DECODING ***** -} | |
{- TODO(paulson): There should probably be a monad threading the [Word8] around -} | |
decodePb :: [Word8] -> RawPB | |
decodePb [] = [] | |
decodePb b1 = (tag, v) : decodePb b3 | |
where | |
(header, b2) = decodeVarint b1 | |
(tag, wireType) = decode_header $ fromIntegral header | |
(v, b3) = decode_value wireType b2 | |
decode_header :: Int32 -> (Int32, Int32) | |
decode_header n = (shiftR n 3, n .&. 0x7) | |
decode_value :: Int32 -> [Word8] -> (Value, [Word8]) | |
decode_value 0 b1 = (PBUInt64 $ fromIntegral v, b2) | |
where | |
(v, b2) = decodeVarint b1 | |
decode_value 1 b1 = (PBFixed64 $ fromIntegral v, b2) | |
where | |
(v, b2) = decodeFixed 8 b1 | |
decode_value 2 b1 = (PBBytes v, b3) | |
where | |
(len, b2) = decodeVarint b1 | |
(v, b3) = (take (fromIntegral len) b2, drop (fromIntegral len) b2) | |
decode_value 5 b1 = (PBFixed32 $ fromIntegral v, b2) | |
where | |
(v, b2) = decodeFixed 4 b1 | |
decodeFixed :: Integer -> [Word8] -> (Integer, [Word8]) | |
decodeFixed len b1 = (bytes_to_integer b2, b3) | |
where | |
(b2, b3) = (take (fromIntegral len) b1, drop (fromIntegral len) b1) | |
bytes_to_integer :: [Word8] -> Integer | |
bytes_to_integer [] = 0 | |
bytes_to_integer (b:bs) = fromIntegral b + 256 * bytes_to_integer bs | |
decodeVarint :: [Word8] -> (Integer, [Word8]) | |
decodeVarint stream = f stream (0::Integer) (1::Integer) | |
where | |
done :: Word8 | |
done = 0x80 {- 1000 1000 -} | |
mask :: Word8 | |
mask = 0x7f {- 0111 1111 -} | |
final :: Word8 -> Bool | |
final byte = (byte .&. done) == 0 | |
val :: Word8 -> Word8 | |
val byte = byte .&. mask | |
f :: [Word8] -> Integer -> Integer -> (Integer, [Word8]) | |
f (b:bs) n p128 | |
| final b = (newn n b p128, bs) | |
| otherwise = f bs (newn n b p128) (p128*128) | |
where | |
newn n byte p128 = n + fromIntegral (val b) * p128 | |
{- *** DEBUGGING *** -} | |
printBytes :: [Word8] -> IO () | |
printBytes bs = putStrLn $ Utils.join " " $ chunksOf 4 str | |
where | |
str = concat strs | |
strs = Prelude.map to_binary bs | |
to_binary n = Prelude.map bit_to_string (to_bits n) | |
to_bits :: Bits a => a -> [Bool] | |
to_bits n = f n (bitSize n - 1) | |
where | |
f n (-1) = [] | |
f n i = testBit n i : f n (i-1) | |
bit_to_string :: Bool -> Char | |
bit_to_string True = '1' | |
bit_to_string False = '0' | |
test1 = [(1 :: Int32, PBInt32 (150 :: Int32))] | |
test2 = [(1 :: Int32, PBInt64 (150 :: Int64))] | |
test3 = [(1 :: Int32, PBFixed64 (150 :: Word64))] | |
test4 = [(1 :: Int32, PBBytes ([150] :: [Word8]))] | |
test5 = [(1 :: Int32, PBFixed32 150)] | |
test6 = [(1 :: Int32, PBString "testing")] | |
test7 = [(3 :: Int32, PBMessage [(1, PBInt32 150)])] | |
test8 = [(3 :: Int32, PBMessage [(1 :: Int32, PBInt32 (-1 :: Int32))])] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment