Skip to content

Instantly share code, notes, and snippets.

@jonathanpaulson
Created May 8, 2014 20:54
Show Gist options
  • Save jonathanpaulson/c62102541ae631c6688e to your computer and use it in GitHub Desktop.
Save jonathanpaulson/c62102541ae631c6688e to your computer and use it in GitHub Desktop.
Haskell Protocol Buffers
{-
Implements binary encoding and decoding of protocol buffers
as specified in:
https://developers.google.com/protocol-buffers/docs/encoding
https://developers.google.com/protocol-buffers/docs/proto
TODO:
Handle [packed=true]
Parse .proto file
Define "nice" proto object spec
Produce nice proto object code from parsed .proto file
-}
import qualified Data.String.Utils as Utils
import Data.Bits
import Data.Int
import Data.Word
import Data.Char
import Data.List.Split
import Text.Printf
import Data.Map as M
import qualified Data.ByteString.Lazy as B
data Foo = Foo { bar :: Int32 } deriving Show
type RawPB = [(Int32, Value)]
data Value =
{- Wire type 0 -}
PBInt32 Int32 | PBInt64 Int64 |
PBUInt32 Word32 | PBUInt64 Word64 |
PBSInt32 Int32 | PBSInt64 Int64 |
PBBool Bool | PBEnum Int32 |
{- Wire type 1 -}
PBFixed64 Word64 | PBSFixed64 Int64 | {- Float64 -}
{- Wire type 2 -}
PBBytes [Word8] | PBString String| PBMessage RawPB |
{- Wire type 5 -}
PBFixed32 Word32 | PBSFixed32 Int32 {- Float32 -}
deriving Show
wireType :: Value -> Int32
wireType (PBInt32 _) = 0
wireType (PBUInt32 _) = 0
wireType (PBSInt32 _) = 0
wireType (PBInt64 _) = 0
wireType (PBUInt64 _) = 0
wireType (PBSInt64 _) = 0
wireType (PBBool _) = 0
wireType (PBEnum _) = 0
wireType (PBFixed64 _) = 1
wireType (PBSFixed64 _) = 1
wireType (PBString _) = 2
wireType (PBBytes _) = 2
wireType (PBMessage _) = 2
wireType (PBFixed32 _) = 5
wireType (PBSFixed32 _) = 5
{- ***** ENCODING ***** -}
encodePb :: RawPB -> [Word8]
encodePb pb = concat field_codes
where
field_codes = Prelude.map encode_field pb
header :: Int32 -> Value -> [Word8]
header k v = encodeVarint $ fromIntegral $ shiftL k 3 .|. fromIntegral (wireType v)
encode_field :: (Int32, Value) -> [Word8]
encode_field (k,v) = header k v ++ encodeValue v
encodeValue :: Value -> [Word8]
{- Wire type 0 -}
encodeValue (PBInt32 n) = encodeVarint $ fromIntegral n
encodeValue (PBUInt32 n) = encodeVarint $ fromIntegral n
encodeValue (PBSInt32 n) = encodeSigned $ fromIntegral n
encodeValue (PBInt64 n) = encodeVarint $ fromIntegral n
encodeValue (PBUInt64 n) = encodeVarint $ fromIntegral n
encodeValue (PBSInt64 n) = encodeSigned $ fromIntegral n
encodeValue (PBBool True) = [1]
encodeValue (PBBool False) = [0]
encodeValue (PBEnum n) = encodeVarint $ fromIntegral n
{- Wire type 1 -}
encodeValue (PBFixed64 n) = encodeFixed 8 (fromIntegral n)
encodeValue (PBSFixed64 n) = encodeFixed 8 (fromIntegral n)
{- Wire type 2 -}
encodeValue (PBBytes bs) = encodeLength bs ++ bs
encodeValue (PBString str) = encodeLength str ++ Prelude.map char_to_byte str
where
char_to_byte ch = fromIntegral $ ord ch
encodeValue (PBMessage msg) = encodeLength bs ++ bs
where
bs = encodePb msg
{- Wire type 5 -}
encodeValue (PBFixed32 n) = encodeFixed 4 (fromIntegral n)
encodeValue (PBSFixed32 n) = encodeFixed 4 (fromIntegral n)
encodeSigned :: Integer -> [Word8]
encodeSigned n | n >= 0 = encodeVarint $ fromIntegral $ shiftL n 1
encodeSigned n | n < 0 = encodeVarint $ fromIntegral $ 1 + shiftL (abs n) 1
encodeLength :: [a] -> [Word8]
encodeLength xs = encodeVarint $ fromIntegral $ length xs
encodeVarint :: Integer -> [Word8]
encodeVarint n | n < 0 = encodeVarint (2^70 + n)
encodeVarint n | n < 128 = [fromIntegral n]
encodeVarint n = (first_byte .|. bit 7) : encodeVarint (n `div` 128)
where
first_byte = fromIntegral $ n `mod` 128
encodeFixed :: Integer -> Integer -> [Word8]
encodeFixed 0 _ = []
encodeFixed i n = fromIntegral (n `mod` 256) : encodeFixed (i-1) (n `div` 256)
{- ***** DECODING ***** -}
{- TODO(paulson): There should probably be a monad threading the [Word8] around -}
decodePb :: [Word8] -> RawPB
decodePb [] = []
decodePb b1 = (tag, v) : decodePb b3
where
(header, b2) = decodeVarint b1
(tag, wireType) = decode_header $ fromIntegral header
(v, b3) = decode_value wireType b2
decode_header :: Int32 -> (Int32, Int32)
decode_header n = (shiftR n 3, n .&. 0x7)
decode_value :: Int32 -> [Word8] -> (Value, [Word8])
decode_value 0 b1 = (PBUInt64 $ fromIntegral v, b2)
where
(v, b2) = decodeVarint b1
decode_value 1 b1 = (PBFixed64 $ fromIntegral v, b2)
where
(v, b2) = decodeFixed 8 b1
decode_value 2 b1 = (PBBytes v, b3)
where
(len, b2) = decodeVarint b1
(v, b3) = (take (fromIntegral len) b2, drop (fromIntegral len) b2)
decode_value 5 b1 = (PBFixed32 $ fromIntegral v, b2)
where
(v, b2) = decodeFixed 4 b1
decodeFixed :: Integer -> [Word8] -> (Integer, [Word8])
decodeFixed len b1 = (bytes_to_integer b2, b3)
where
(b2, b3) = (take (fromIntegral len) b1, drop (fromIntegral len) b1)
bytes_to_integer :: [Word8] -> Integer
bytes_to_integer [] = 0
bytes_to_integer (b:bs) = fromIntegral b + 256 * bytes_to_integer bs
decodeVarint :: [Word8] -> (Integer, [Word8])
decodeVarint stream = f stream (0::Integer) (1::Integer)
where
done :: Word8
done = 0x80 {- 1000 1000 -}
mask :: Word8
mask = 0x7f {- 0111 1111 -}
final :: Word8 -> Bool
final byte = (byte .&. done) == 0
val :: Word8 -> Word8
val byte = byte .&. mask
f :: [Word8] -> Integer -> Integer -> (Integer, [Word8])
f (b:bs) n p128
| final b = (newn n b p128, bs)
| otherwise = f bs (newn n b p128) (p128*128)
where
newn n byte p128 = n + fromIntegral (val b) * p128
{- *** DEBUGGING *** -}
printBytes :: [Word8] -> IO ()
printBytes bs = putStrLn $ Utils.join " " $ chunksOf 4 str
where
str = concat strs
strs = Prelude.map to_binary bs
to_binary n = Prelude.map bit_to_string (to_bits n)
to_bits :: Bits a => a -> [Bool]
to_bits n = f n (bitSize n - 1)
where
f n (-1) = []
f n i = testBit n i : f n (i-1)
bit_to_string :: Bool -> Char
bit_to_string True = '1'
bit_to_string False = '0'
test1 = [(1 :: Int32, PBInt32 (150 :: Int32))]
test2 = [(1 :: Int32, PBInt64 (150 :: Int64))]
test3 = [(1 :: Int32, PBFixed64 (150 :: Word64))]
test4 = [(1 :: Int32, PBBytes ([150] :: [Word8]))]
test5 = [(1 :: Int32, PBFixed32 150)]
test6 = [(1 :: Int32, PBString "testing")]
test7 = [(3 :: Int32, PBMessage [(1, PBInt32 150)])]
test8 = [(3 :: Int32, PBMessage [(1 :: Int32, PBInt32 (-1 :: Int32))])]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment