Skip to content

Instantly share code, notes, and snippets.

@Piezoid
Last active May 7, 2019 13:48
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Piezoid/ee43be6e5eebd6aa9bac to your computer and use it in GitHub Desktop.
Save Piezoid/ee43be6e5eebd6aa9bac to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Control.Applicative ((<$>))
import Data.List
import Data.Maybe (mapMaybe)
import Data.Word (Word)
import Data.Char (isSpace)
import Data.Bits
import Data.Ratio ((%))
import Data.Monoid ((<>), mempty, mconcat)
import Data.Foldable (foldMap)
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Builder as B
import Numeric (showFFloat)
import System.IO (stderr, stdout, hPutStrLn)
-- | IP stored as 32 bits words are easier to compare and increment
-- Machine words (maybe 64 bits) are slightly faster (no narrowing)
type IP = Word
-- | Named ip range
data NamedIPR = NamedIPR
{-# UNPACK #-} !IP
{-# UNPACK #-} !IP
![BS.ByteString]
deriving (Eq, Ord)
main :: IO ()
main = do
input <- mapMaybe processLine
. BS.lines
<$> BS.getContents
let merged = mergeIPR input
-- Stats
nInputRules = length input
nOutputRules = length merged
nIPs = toInteger . sum $ [1 + e - b | NamedIPR b e _ <- merged ]
percent :: Double
percent = fromRational $ 100 * nIPs % 2 ^ (32 :: Integer)
-- Helper
errPutStrLn = hPutStrLn stderr
-- Print stats
errPutStrLn $ shows nInputRules " rules read."
errPutStrLn $ showFFloat (Just 2) percent
. showString "% of IPv4 space is covered with "
. shows nOutputRules
. showString " ranges, for a total of "
. shows nIPs
$ " addresses."
-- Print merged list
B.hPutBuilder stdout . foldMap niprToB $ merged
processLine :: BS.ByteString -> Maybe NamedIPR
processLine l =
let
ignored = BS.null l || ('#' == BS.head l)
(name, iprBS) = case BS.split ':' l of
[n, r] -> (n, r)
xs -> (BS.concat $ init xs, last xs)
(start, end) | [a, o] <- BS.split '-' iprBS
= (readIP a, readIP o)
|otherwise = error $ "malformed range: " ++ show iprBS
readWord8 x | Just (i, b) <- BS.readInt x
, 0 <= i, i < 256
, BS.null b
= fromInteger . toInteger $ i
| otherwise = error $ "malformed ip digit: " ++ show x
readIP ipbs | [a,b,c,d] <- map readWord8 . BS.split '.' $ ipbs
= d
.|. (c `unsafeShiftL` 8)
.|. (b `unsafeShiftL` 16)
.|. (a `unsafeShiftL` 24)
| otherwise = error $ "malformed IP: " ++ show ipbs
names = sort
. filter (not . BS.null)
. map (BS.dropWhile isSpace)
. BS.split ','
$ name
in if ignored || (start > end)
then Nothing
else Just (NamedIPR start end names)
mergeIPR :: [NamedIPR] -> [NamedIPR]
-- mergeIPR = foldr (flip f) [] . sortBy (flip compare)
mergeIPR = foldl' f [] . sort
where
f cl@(x@(NamedIPR xb xe xn):xs) y@(NamedIPR yb ye yn)
| nextIP xe >= yb -- overlapping range
= NamedIPR xb ye (merge xn yn) : xs
| otherwise -- disjoint ranges
= y : cl
f [] c = [c] -- Initial case
nextIP ip | ip /= maxBound = ip + 1
| otherwise = ip
niprToB :: NamedIPR -> Builder
niprToB (NamedIPR b e n) = name
-- wordDec (1 + e - b)
<> char7 ':' <> ipToB b
<> char7 '-' <> ipToB e
<> char7 '\n'
where
name = mconcat
. intersperse (B.shortByteString ", ")
. map B.byteString
$ n
ipToB w32 = mconcat
. intersperse (char7 '.')
. map wordDec
$ [ (w32 `unsafeShiftR` 24) .&. 255
, (w32 `unsafeShiftR` 16) .&. 255
, (w32 `unsafeShiftR` 8 ) .&. 255
, w32 .&. 255
]
merge :: Ord a => [a] -> [a] -> [a]
merge (x:xs) (y:ys) = case compare x y of
LT -> x : merge xs (y:ys)
GT -> y : merge (x:xs) ys
EQ -> x : merge xs ys
merge xs [] = xs
merge [] ys = ys
@rxw1
Copy link

rxw1 commented Jul 28, 2014

This happens when I run it against a gzipped file:

165089 master ~src/ee43be6e5eebd6aa9bac : ./fuseblkl < /tmp/blocklists-2014-26-29/bcoepfyewziejvcqyhqo.gz
fuseblkl: malformed range: "\242@\174#\230\128\165#\246\128\165#\ACK\129\165\163\&6\144^\132\179\b,\GS\177\b,\GSY \211\DC1\139\192\210\DC1\139\192\210\DC1\139  z\STX\255a\187\DLE\182\ACK\NUL\NUL"

And this against a (pretty big) gunzipped file:

165094 master ~src/ee43be6e5eebd6aa9bac : ./fuseblkl < /tmp/blocklist
904927 rules read.
Stack space overflow: current size 8388608 bytes.
Use `+RTS -Ksize -RTS' to increase it.

@Piezoid
Copy link
Author

Piezoid commented Sep 13, 2014

I removed the gzip support because the support for concatenated .gz files was broken. You can pipe with zcat/gzip :

zcat *.gz | ./fuseblkl | gzip > fused.gz

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment