Created
January 18, 2011 10:11
-
-
Save songpp/784254 to your computer and use it in GitHub Desktop.
parse ip data by haskell -- in-complete
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module IP where | |
import System.IO | |
import qualified Data.ByteString.Lazy as BL | |
import qualified Data.ByteString.Lazy.UTF8 as UTF8 | |
import Data.Binary.Get | |
import Data.Word | |
import Data.Char | |
import Numeric | |
import Data.Bits | |
--import Text.Regex | |
import Data.Int | |
import Control.Monad (replicateM) | |
--dataFile = "e:/Program Files/cz88.net/ip/qqwry.dat" | |
dataFile = "F:/Program Files/cz88.net/ip/qqwry.dat" | |
--第一条IP的索引位置 | |
firstIpOffset = 5712056 | |
--最后一条IP的索引位置 | |
lastIpOffset = 8576351 | |
--共有多少条IP记录 | |
--(firstIpOffse + lastIpOffset) / 7 | |
totalRecord = 409185 | |
--读取文件头共8字节 | |
--前四个字节指示第一条ip的索引位置 | |
--后四个字节指示最后一条IP的索引位置 | |
readHeader :: BL.ByteString -> (Integer,Integer) | |
readHeader header = (fromIntegral left,fromIntegral right) | |
where (left,right) = | |
runGet ( | |
getWord32le >>= \first -> | |
getWord32le >>= \last -> | |
return (first,last) | |
) header | |
dataFileHandle :: IO Handle | |
dataFileHandle = openFile dataFile ReadMode | |
readOneByte :: BL.ByteString -> Word8 | |
readOneByte = runGet getWord8 | |
-- read Bytes and return by little-endian | |
readBytes :: Int -> BL.ByteString -> [Word8] | |
readBytes n = runGet ( fmap reverse (Control.Monad.replicateM n getWord8 )) | |
-- Word32 to Word8 by little endian order | |
intToBytes :: Word32 -> [Word8] | |
intToBytes w = [ fromIntegral (w `shiftR` 24) | |
, fromIntegral (w `shiftR` 16) | |
, fromIntegral (w `shiftR` 8) | |
, fromIntegral w ] | |
-- for test a region offset : 5712023 | |
readRegion offset howMany = | |
dataFileHandle >>= \h -> | |
readIt h offset >>= \(_,r) -> | |
(return . runGet getWord32be) (BL.pack r) >>= \p -> | |
-- +4 是因为跳过的这4个字节是这个ip段的结束地址 | |
(hSeek h AbsoluteSeek . fromIntegral) (p + 4) >> | |
fmap BL.unpack (BL.hGet h howMany) | |
-- >>= return . map (\i -> fromIntegral i :: Int8) | |
readIt h offset = do | |
hSeek h AbsoluteSeek offset | |
b <- BL.hGet h 7 | |
let (ippart,region) = BL.splitAt 4 b | |
let ip = readBytes 4 ippart | |
let regionOffset = 0 : readBytes 3 region | |
return (ip,regionOffset) | |
main :: IO () | |
main = do | |
h <- dataFileHandle | |
--fileSize <- hFileSize h | |
header <- BL.hGet h 8 | |
let addr@(first,last) = readHeader header | |
print addr | |
ip <- readIt h last | |
print ip | |
hClose h | |
gbkToUtf8 :: BL.ByteString -> BL.ByteString | |
gbkToUtf8 binary = BL.pack [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment