Piezoid/fuseblkl.hs

## fuseblkl.hs
{-# LANGUAGE OverloadedStrings #-}

module Main (main) where

import Control.Applicative ((<$>))

import Data.List
import Data.Maybe (mapMaybe)
import Data.Word (Word)
import Data.Char (isSpace)
import Data.Bits
import Data.Ratio ((%))
import Data.Monoid ((<>), mempty, mconcat)
import Data.Foldable (foldMap)

import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Builder as B

import Numeric (showFFloat)
import System.IO (stderr, stdout, hPutStrLn)


-- | IP stored as 32 bits words are easier to compare and increment
-- Machine words (maybe 64 bits) are slightly faster (no narrowing)
type IP = Word

-- | Named ip range
data NamedIPR = NamedIPR
                  {-# UNPACK #-} !IP
                  {-# UNPACK #-} !IP
                                 ![BS.ByteString]
  deriving (Eq, Ord)


main :: IO ()
main = do
  input <- mapMaybe processLine
         . BS.lines
       <$> BS.getContents

  let merged = mergeIPR input

      -- Stats
      nInputRules = length input
      nOutputRules = length merged

      nIPs = toInteger . sum $ [1 + e - b | NamedIPR b e _ <- merged ]

      percent :: Double
      percent = fromRational $ 100 * nIPs % 2 ^ (32 :: Integer)

      -- Helper
      errPutStrLn = hPutStrLn stderr

  -- Print stats
  errPutStrLn $ shows nInputRules " rules read."
  errPutStrLn $ showFFloat (Just 2) percent
              . showString "% of IPv4 space is covered with "
              . shows nOutputRules
              . showString " ranges, for a total of "
              . shows nIPs
              $ " addresses."

  -- Print merged list
  B.hPutBuilder stdout . foldMap niprToB $ merged


processLine :: BS.ByteString -> Maybe NamedIPR
processLine l =
  let
    ignored = BS.null l || ('#' == BS.head l)

    (name, iprBS) = case BS.split ':' l of
                       [n, r] -> (n, r)
                       xs -> (BS.concat $ init xs, last xs)

    (start, end) | [a, o] <- BS.split '-' iprBS
                 = (readIP a, readIP o)
                 |otherwise = error $ "malformed range: " ++ show iprBS

    readWord8 x | Just (i, b) <- BS.readInt x
                , 0 <= i, i < 256
                , BS.null b
                = fromInteger . toInteger $ i
                | otherwise = error $ "malformed ip digit: " ++ show x

    readIP ipbs | [a,b,c,d] <- map readWord8 . BS.split '.' $ ipbs
                =     d
                 .|. (c `unsafeShiftL` 8)
                 .|. (b `unsafeShiftL` 16)
                 .|. (a `unsafeShiftL` 24)
                | otherwise = error $ "malformed IP: " ++ show ipbs

    names = sort
          . filter (not . BS.null)
          . map (BS.dropWhile isSpace)
          . BS.split ','
          $ name

  in if ignored || (start > end)
      then Nothing
      else Just (NamedIPR start end names)


mergeIPR :: [NamedIPR] -> [NamedIPR]
-- mergeIPR = foldr (flip f) [] . sortBy (flip compare)
mergeIPR = foldl' f [] . sort
  where
    f cl@(x@(NamedIPR xb xe xn):xs) y@(NamedIPR yb ye yn)
      | nextIP xe >= yb -- overlapping range
      = NamedIPR xb ye (merge xn yn) : xs
      | otherwise -- disjoint ranges
      = y : cl
    f [] c = [c] -- Initial case

    nextIP ip | ip /= maxBound = ip + 1
              | otherwise = ip

niprToB :: NamedIPR -> Builder
niprToB (NamedIPR b e n) =  name
--                   wordDec (1 + e - b)
                 <> char7 ':' <> ipToB b
                 <> char7 '-' <> ipToB e
                 <> char7 '\n'
  where
    name = mconcat
         . intersperse (B.shortByteString ", ")
         . map B.byteString
         $ n

    ipToB w32 = mconcat
              . intersperse (char7 '.')
              . map wordDec
              $ [ (w32 `unsafeShiftR` 24) .&. 255
                , (w32 `unsafeShiftR` 16) .&. 255
                , (w32 `unsafeShiftR` 8 ) .&. 255
                ,  w32 .&. 255
                ]

merge :: Ord a => [a] -> [a] -> [a]
merge (x:xs) (y:ys) = case compare x y of
                           LT -> x : merge xs (y:ys)
                           GT -> y : merge (x:xs) ys
                           EQ -> x : merge xs ys
merge xs [] = xs
merge [] ys = ys
	{-# LANGUAGE OverloadedStrings #-}

	module Main (main) where

	import Control.Applicative ((<$>))

	import Data.List
	import Data.Maybe (mapMaybe)
	import Data.Word (Word)
	import Data.Char (isSpace)
	import Data.Bits
	import Data.Ratio ((%))
	import Data.Monoid ((<>), mempty, mconcat)
	import Data.Foldable (foldMap)

	import qualified Data.ByteString.Char8 as BS
	import Data.ByteString.Builder as B

	import Numeric (showFFloat)
	import System.IO (stderr, stdout, hPutStrLn)



	-- \| IP stored as 32 bits words are easier to compare and increment
	-- Machine words (maybe 64 bits) are slightly faster (no narrowing)
	type IP = Word

	-- \| Named ip range
	data NamedIPR = NamedIPR
	{-# UNPACK #-} !IP
	{-# UNPACK #-} !IP
	![BS.ByteString]
	deriving (Eq, Ord)


	main :: IO ()
	main = do
	input <- mapMaybe processLine
	. BS.lines
	<$> BS.getContents

	let merged = mergeIPR input

	-- Stats
	nInputRules = length input
	nOutputRules = length merged

	nIPs = toInteger . sum $ [1 + e - b \| NamedIPR b e _ <- merged ]

	percent :: Double
	percent = fromRational $ 100 * nIPs % 2 ^ (32 :: Integer)

	-- Helper
	errPutStrLn = hPutStrLn stderr

	-- Print stats
	errPutStrLn $ shows nInputRules " rules read."
	errPutStrLn $ showFFloat (Just 2) percent
	. showString "% of IPv4 space is covered with "
	. shows nOutputRules
	. showString " ranges, for a total of "
	. shows nIPs
	$ " addresses."

	-- Print merged list
	B.hPutBuilder stdout . foldMap niprToB $ merged


	processLine :: BS.ByteString -> Maybe NamedIPR
	processLine l =
	let
	ignored = BS.null l \|\| ('#' == BS.head l)

	(name, iprBS) = case BS.split ':' l of
	[n, r] -> (n, r)
	xs -> (BS.concat $ init xs, last xs)

	(start, end) \| [a, o] <- BS.split '-' iprBS
	= (readIP a, readIP o)
	\|otherwise = error $ "malformed range: " ++ show iprBS

	readWord8 x \| Just (i, b) <- BS.readInt x
	, 0 <= i, i < 256
	, BS.null b
	= fromInteger . toInteger $ i
	\| otherwise = error $ "malformed ip digit: " ++ show x

	readIP ipbs \| [a,b,c,d] <- map readWord8 . BS.split '.' $ ipbs
	= d
	.\|. (c `unsafeShiftL` 8)
	.\|. (b `unsafeShiftL` 16)
	.\|. (a `unsafeShiftL` 24)
	\| otherwise = error $ "malformed IP: " ++ show ipbs

	names = sort
	. filter (not . BS.null)
	. map (BS.dropWhile isSpace)
	. BS.split ','
	$ name

	in if ignored \|\| (start > end)
	then Nothing
	else Just (NamedIPR start end names)


	mergeIPR :: [NamedIPR] -> [NamedIPR]
	-- mergeIPR = foldr (flip f) [] . sortBy (flip compare)
	mergeIPR = foldl' f [] . sort
	where
	f cl@(x@(NamedIPR xb xe xn):xs) y@(NamedIPR yb ye yn)
	\| nextIP xe >= yb -- overlapping range
	= NamedIPR xb ye (merge xn yn) : xs
	\| otherwise -- disjoint ranges
	= y : cl
	f [] c = [c] -- Initial case

	nextIP ip \| ip /= maxBound = ip + 1
	\| otherwise = ip

	niprToB :: NamedIPR -> Builder
	niprToB (NamedIPR b e n) = name
	-- wordDec (1 + e - b)
	<> char7 ':' <> ipToB b
	<> char7 '-' <> ipToB e
	<> char7 '\n'
	where
	name = mconcat
	. intersperse (B.shortByteString ", ")
	. map B.byteString
	$ n

	ipToB w32 = mconcat
	. intersperse (char7 '.')
	. map wordDec
	$ [ (w32 `unsafeShiftR` 24) .&. 255
	, (w32 `unsafeShiftR` 16) .&. 255
	, (w32 `unsafeShiftR` 8 ) .&. 255
	, w32 .&. 255
	]

	merge :: Ord a => [a] -> [a] -> [a]
	merge (x:xs) (y:ys) = case compare x y of
	LT -> x : merge xs (y:ys)
	GT -> y : merge (x:xs) ys
	EQ -> x : merge xs ys
	merge xs [] = xs
	merge [] ys = ys