Skip to content

Instantly share code, notes, and snippets.

@songpp
Created January 18, 2011 10:11
Show Gist options
  • Save songpp/784254 to your computer and use it in GitHub Desktop.
Save songpp/784254 to your computer and use it in GitHub Desktop.
parse ip data by haskell -- in-complete
module IP where
import System.IO
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import Data.Binary.Get
import Data.Word
import Data.Char
import Numeric
import Data.Bits
--import Text.Regex
import Data.Int
import Control.Monad (replicateM)
--dataFile = "e:/Program Files/cz88.net/ip/qqwry.dat"
dataFile = "F:/Program Files/cz88.net/ip/qqwry.dat"
--第一条IP的索引位置
firstIpOffset = 5712056
--最后一条IP的索引位置
lastIpOffset = 8576351
--共有多少条IP记录
--(firstIpOffse + lastIpOffset) / 7
totalRecord = 409185
--读取文件头共8字节
--前四个字节指示第一条ip的索引位置
--后四个字节指示最后一条IP的索引位置
readHeader :: BL.ByteString -> (Integer,Integer)
readHeader header = (fromIntegral left,fromIntegral right)
where (left,right) =
runGet (
getWord32le >>= \first ->
getWord32le >>= \last ->
return (first,last)
) header
dataFileHandle :: IO Handle
dataFileHandle = openFile dataFile ReadMode
readOneByte :: BL.ByteString -> Word8
readOneByte = runGet getWord8
-- read Bytes and return by little-endian
readBytes :: Int -> BL.ByteString -> [Word8]
readBytes n = runGet ( fmap reverse (Control.Monad.replicateM n getWord8 ))
-- Word32 to Word8 by little endian order
intToBytes :: Word32 -> [Word8]
intToBytes w = [ fromIntegral (w `shiftR` 24)
, fromIntegral (w `shiftR` 16)
, fromIntegral (w `shiftR` 8)
, fromIntegral w ]
-- for test a region offset : 5712023
readRegion offset howMany =
dataFileHandle >>= \h ->
readIt h offset >>= \(_,r) ->
(return . runGet getWord32be) (BL.pack r) >>= \p ->
-- +4 是因为跳过的这4个字节是这个ip段的结束地址
(hSeek h AbsoluteSeek . fromIntegral) (p + 4) >>
fmap BL.unpack (BL.hGet h howMany)
-- >>= return . map (\i -> fromIntegral i :: Int8)
readIt h offset = do
hSeek h AbsoluteSeek offset
b <- BL.hGet h 7
let (ippart,region) = BL.splitAt 4 b
let ip = readBytes 4 ippart
let regionOffset = 0 : readBytes 3 region
return (ip,regionOffset)
main :: IO ()
main = do
h <- dataFileHandle
--fileSize <- hFileSize h
header <- BL.hGet h 8
let addr@(first,last) = readHeader header
print addr
ip <- readIt h last
print ip
hClose h
gbkToUtf8 :: BL.ByteString -> BL.ByteString
gbkToUtf8 binary = BL.pack []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment