{-# LANGUAGE OverloadedStrings #-} | |
module Main (main) where | |
import Control.Applicative ((<$>)) | |
import Data.List | |
import Data.Maybe (mapMaybe) | |
import Data.Word (Word) | |
import Data.Char (isSpace) | |
import Data.Bits | |
import Data.Ratio ((%)) | |
import Data.Monoid ((<>), mempty, mconcat) | |
import Data.Foldable (foldMap) | |
import qualified Data.ByteString.Char8 as BS | |
import Data.ByteString.Builder as B | |
import Numeric (showFFloat) | |
import System.IO (stderr, stdout, hPutStrLn) | |
-- | IP stored as 32 bits words are easier to compare and increment | |
-- Machine words (maybe 64 bits) are slightly faster (no narrowing) | |
type IP = Word | |
-- | Named ip range | |
data NamedIPR = NamedIPR | |
{-# UNPACK #-} !IP | |
{-# UNPACK #-} !IP | |
![BS.ByteString] | |
deriving (Eq, Ord) | |
main :: IO () | |
main = do | |
input <- mapMaybe processLine | |
. BS.lines | |
<$> BS.getContents | |
let merged = mergeIPR input | |
-- Stats | |
nInputRules = length input | |
nOutputRules = length merged | |
nIPs = toInteger . sum $ [1 + e - b | NamedIPR b e _ <- merged ] | |
percent :: Double | |
percent = fromRational $ 100 * nIPs % 2 ^ (32 :: Integer) | |
-- Helper | |
errPutStrLn = hPutStrLn stderr | |
-- Print stats | |
errPutStrLn $ shows nInputRules " rules read." | |
errPutStrLn $ showFFloat (Just 2) percent | |
. showString "% of IPv4 space is covered with " | |
. shows nOutputRules | |
. showString " ranges, for a total of " | |
. shows nIPs | |
$ " addresses." | |
-- Print merged list | |
B.hPutBuilder stdout . foldMap niprToB $ merged | |
processLine :: BS.ByteString -> Maybe NamedIPR | |
processLine l = | |
let | |
ignored = BS.null l || ('#' == BS.head l) | |
(name, iprBS) = case BS.split ':' l of | |
[n, r] -> (n, r) | |
xs -> (BS.concat $ init xs, last xs) | |
(start, end) | [a, o] <- BS.split '-' iprBS | |
= (readIP a, readIP o) | |
|otherwise = error $ "malformed range: " ++ show iprBS | |
readWord8 x | Just (i, b) <- BS.readInt x | |
, 0 <= i, i < 256 | |
, BS.null b | |
= fromInteger . toInteger $ i | |
| otherwise = error $ "malformed ip digit: " ++ show x | |
readIP ipbs | [a,b,c,d] <- map readWord8 . BS.split '.' $ ipbs | |
= d | |
.|. (c `unsafeShiftL` 8) | |
.|. (b `unsafeShiftL` 16) | |
.|. (a `unsafeShiftL` 24) | |
| otherwise = error $ "malformed IP: " ++ show ipbs | |
names = sort | |
. filter (not . BS.null) | |
. map (BS.dropWhile isSpace) | |
. BS.split ',' | |
$ name | |
in if ignored || (start > end) | |
then Nothing | |
else Just (NamedIPR start end names) | |
mergeIPR :: [NamedIPR] -> [NamedIPR] | |
-- mergeIPR = foldr (flip f) [] . sortBy (flip compare) | |
mergeIPR = foldl' f [] . sort | |
where | |
f cl@(x@(NamedIPR xb xe xn):xs) y@(NamedIPR yb ye yn) | |
| nextIP xe >= yb -- overlapping range | |
= NamedIPR xb ye (merge xn yn) : xs | |
| otherwise -- disjoint ranges | |
= y : cl | |
f [] c = [c] -- Initial case | |
nextIP ip | ip /= maxBound = ip + 1 | |
| otherwise = ip | |
niprToB :: NamedIPR -> Builder | |
niprToB (NamedIPR b e n) = name | |
-- wordDec (1 + e - b) | |
<> char7 ':' <> ipToB b | |
<> char7 '-' <> ipToB e | |
<> char7 '\n' | |
where | |
name = mconcat | |
. intersperse (B.shortByteString ", ") | |
. map B.byteString | |
$ n | |
ipToB w32 = mconcat | |
. intersperse (char7 '.') | |
. map wordDec | |
$ [ (w32 `unsafeShiftR` 24) .&. 255 | |
, (w32 `unsafeShiftR` 16) .&. 255 | |
, (w32 `unsafeShiftR` 8 ) .&. 255 | |
, w32 .&. 255 | |
] | |
merge :: Ord a => [a] -> [a] -> [a] | |
merge (x:xs) (y:ys) = case compare x y of | |
LT -> x : merge xs (y:ys) | |
GT -> y : merge (x:xs) ys | |
EQ -> x : merge xs ys | |
merge xs [] = xs | |
merge [] ys = ys |
This comment has been minimized.
This comment has been minimized.
I removed the gzip support because the support for concatenated .gz files was broken. You can pipe with zcat/gzip :
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
This happens when I run it against a gzipped file:
And this against a (pretty big) gunzipped file: