Last active
May 7, 2019 13:48
-
-
Save Piezoid/ee43be6e5eebd6aa9bac to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Main (main) where | |
import Control.Applicative ((<$>)) | |
import Data.List | |
import Data.Maybe (mapMaybe) | |
import Data.Word (Word) | |
import Data.Char (isSpace) | |
import Data.Bits | |
import Data.Ratio ((%)) | |
import Data.Monoid ((<>), mempty, mconcat) | |
import Data.Foldable (foldMap) | |
import qualified Data.ByteString.Char8 as BS | |
import Data.ByteString.Builder as B | |
import Numeric (showFFloat) | |
import System.IO (stderr, stdout, hPutStrLn) | |
-- | IP stored as 32 bits words are easier to compare and increment | |
-- Machine words (maybe 64 bits) are slightly faster (no narrowing) | |
type IP = Word | |
-- | Named ip range | |
data NamedIPR = NamedIPR | |
{-# UNPACK #-} !IP | |
{-# UNPACK #-} !IP | |
![BS.ByteString] | |
deriving (Eq, Ord) | |
main :: IO () | |
main = do | |
input <- mapMaybe processLine | |
. BS.lines | |
<$> BS.getContents | |
let merged = mergeIPR input | |
-- Stats | |
nInputRules = length input | |
nOutputRules = length merged | |
nIPs = toInteger . sum $ [1 + e - b | NamedIPR b e _ <- merged ] | |
percent :: Double | |
percent = fromRational $ 100 * nIPs % 2 ^ (32 :: Integer) | |
-- Helper | |
errPutStrLn = hPutStrLn stderr | |
-- Print stats | |
errPutStrLn $ shows nInputRules " rules read." | |
errPutStrLn $ showFFloat (Just 2) percent | |
. showString "% of IPv4 space is covered with " | |
. shows nOutputRules | |
. showString " ranges, for a total of " | |
. shows nIPs | |
$ " addresses." | |
-- Print merged list | |
B.hPutBuilder stdout . foldMap niprToB $ merged | |
processLine :: BS.ByteString -> Maybe NamedIPR | |
processLine l = | |
let | |
ignored = BS.null l || ('#' == BS.head l) | |
(name, iprBS) = case BS.split ':' l of | |
[n, r] -> (n, r) | |
xs -> (BS.concat $ init xs, last xs) | |
(start, end) | [a, o] <- BS.split '-' iprBS | |
= (readIP a, readIP o) | |
|otherwise = error $ "malformed range: " ++ show iprBS | |
readWord8 x | Just (i, b) <- BS.readInt x | |
, 0 <= i, i < 256 | |
, BS.null b | |
= fromInteger . toInteger $ i | |
| otherwise = error $ "malformed ip digit: " ++ show x | |
readIP ipbs | [a,b,c,d] <- map readWord8 . BS.split '.' $ ipbs | |
= d | |
.|. (c `unsafeShiftL` 8) | |
.|. (b `unsafeShiftL` 16) | |
.|. (a `unsafeShiftL` 24) | |
| otherwise = error $ "malformed IP: " ++ show ipbs | |
names = sort | |
. filter (not . BS.null) | |
. map (BS.dropWhile isSpace) | |
. BS.split ',' | |
$ name | |
in if ignored || (start > end) | |
then Nothing | |
else Just (NamedIPR start end names) | |
mergeIPR :: [NamedIPR] -> [NamedIPR] | |
-- mergeIPR = foldr (flip f) [] . sortBy (flip compare) | |
mergeIPR = foldl' f [] . sort | |
where | |
f cl@(x@(NamedIPR xb xe xn):xs) y@(NamedIPR yb ye yn) | |
| nextIP xe >= yb -- overlapping range | |
= NamedIPR xb ye (merge xn yn) : xs | |
| otherwise -- disjoint ranges | |
= y : cl | |
f [] c = [c] -- Initial case | |
nextIP ip | ip /= maxBound = ip + 1 | |
| otherwise = ip | |
niprToB :: NamedIPR -> Builder | |
niprToB (NamedIPR b e n) = name | |
-- wordDec (1 + e - b) | |
<> char7 ':' <> ipToB b | |
<> char7 '-' <> ipToB e | |
<> char7 '\n' | |
where | |
name = mconcat | |
. intersperse (B.shortByteString ", ") | |
. map B.byteString | |
$ n | |
ipToB w32 = mconcat | |
. intersperse (char7 '.') | |
. map wordDec | |
$ [ (w32 `unsafeShiftR` 24) .&. 255 | |
, (w32 `unsafeShiftR` 16) .&. 255 | |
, (w32 `unsafeShiftR` 8 ) .&. 255 | |
, w32 .&. 255 | |
] | |
merge :: Ord a => [a] -> [a] -> [a] | |
merge (x:xs) (y:ys) = case compare x y of | |
LT -> x : merge xs (y:ys) | |
GT -> y : merge (x:xs) ys | |
EQ -> x : merge xs ys | |
merge xs [] = xs | |
merge [] ys = ys |
I removed the gzip support because the support for concatenated .gz files was broken. You can pipe with zcat/gzip :
zcat *.gz | ./fuseblkl | gzip > fused.gz
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This happens when I run it against a gzipped file:
And this against a (pretty big) gunzipped file: