Skip to content

Instantly share code, notes, and snippets.

@yuga
Last active December 15, 2015 15:09
Show Gist options
  • Save yuga/5279313 to your computer and use it in GitHub Desktop.
Save yuga/5279313 to your computer and use it in GitHub Desktop.

軽い気持ちで tables を使ってみました

準備

{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE LiberalTypeSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}

import qualified Data.ByteString        as S
import qualified Data.ByteString.Char8  as CH
import qualified Data.ByteString.Lazy   as L
import qualified Data.Csv               as C
import qualified Data.Csv.Streaming     as CS
import qualified Data.Vector            as V

import Control.Applicative  ( (<$>), (<*>) )
import Control.Lens         ( defaultRules, lensField, makeLensesWith, (&), (.~), (^.), (??) )
import Control.Monad        ( mzero, unless, void )
import Data.Char            ( ord )
import Data.Conduit         ( GInfConduit, Sink, awaitE, runResourceT, yield, ($=), (=$=), ($$) )
import Data.Conduit.Binary  ( lines, sourceFile )
import Data.Conduit.Util    ( SinkStateResult(StateProcessing), sinkState )
import Data.Csv             ( DecodeOptions(..), defaultDecodeOptions, (.!) )
import Data.Data            ( Data, Typeable )
import Data.DateTime        ( DateTime )
import Data.Table           ( Tabular(..), Candidate, Primary, Supplemental, Tab, Table
                            , autoIncrement, empty, insert, table )

import Prelude       hiding ( lines )
import Debug.Trace

==========

レコード定義

手元にパケットのログデータをつめたTSVファイルがあったので、それを使用します。

2012-10-18 08:56:43.337266000000   -40   -92   66   3   00:26:B6:AA:AA:AA   Askey Computer   tp

ログの形式ほぼそのままに、先頭にIDだけを追加したデータ型を作ります。

data Packet = Packet
    { pId        :: Int
    , pDateTime  :: DateTime
    , pDbmSignal :: Int
    , pDbmNoise  :: Int
    , pDbSignal  :: Int
    , pDbNoise   :: Int
    , pMacAddr   :: String
    , pVendor    :: String
    , pSource    :: String
    }
    deriving (Eq,Data,Ord,Read,Show,Typeable)

CSVから読み込むためのインスタンス定義

cassava (Data.Csv) を使います。

instance C.FromRecord Packet where
    parseRecord v
        | V.length v == 8 = Packet 0
                                <$> v .! 0 
                                <*> v .! 1 
                                <*> v .! 2 
                                <*> v .! 3 
                                <*> v .! 4 
                                <*> v .! 5 
                                <*> v .! 6 
                                <*> v .! 7
        | otherwise       = mzero

instance C.FromField DateTime where
    parseField s = return (read $ CH.unpack s)

Lensを作成

makeLensesWith ?? ''Packet $ defaultRules & lensField .~ \x -> Just (x ++ "_")

Tableにするためのインスタンス定義

class Ord (PKT t) => Tabular t where

instance Tabular Packet where
    -- The primary key type
    type PKT Packet = Int

    -- The type used internally for colmuns
    data Key k Packet b where
        PID        :: Key Primary      Packet Int
        PDateTime  :: Key Supplemental Packet DateTime
        PDbmSignal :: Key Supplemental Packet Int
        PMacAddr   :: Key Supplemental Packet String
        PVendor    :: Key Supplemental Packet String
        PSource    :: Key Supplemental Packet String

    -- Used to store indices
    data Tab Packet i = PacketTab (i Primary      Int)
                                  (i Supplemental DateTime)
                                  (i Supplemental Int)
                                  (i Supplemental String)
                                  (i Supplemental String)
                                  (i Supplemental String)

    -- Extract the value of a Key
    -- fetch :: Key k t a -> t -> a
    fetch PID        = pId
    fetch PDateTime  = pDateTime
    fetch PDbmSignal = pDbmSignal
    fetch PMacAddr   = pMacAddr
    fetch PVendor    = pVendor
    fetch PSource    = pSource

    -- Every Table has one Primary Key
    -- primary :: Key Primary t (PKT t)
    primary = PID

    -- ... and so if you find one, it had better be that one!
    -- primarily :: Key Primary t a -> (a ~ PKT t => r) -> r
    primarily PID r = r

    -- Construct a Tab given a function from key to index
    -- mkTab :: Applicative h => (forall k a. IsKeyType k a => Key k t a -> h (i k a)) -> h (Tab t i)
    mkTab f = PacketTab <$> f PID
                        <*> f PDateTime
                        <*> f PDbmSignal
                        <*> f PMacAddr
                        <*> f PVendor
                        <*> f PSource

    -- Lookup an index in a Tab
    -- ixTab :: Tab t i -> Key k t a -> i k a
    ixTab (PacketTab x _ _ _ _ _) PID        = x
    ixTab (PacketTab _ x _ _ _ _) PDateTime  = x
    ixTab (PacketTab _ _ x _ _ _) PDbmSignal = x
    ixTab (PacketTab _ _ _ x _ _) PMacAddr   = x
    ixTab (PacketTab _ _ _ _ x _) PVendor    = x
    ixTab (PacketTab _ _ _ _ _ x) PSource    = x

    forTab (PacketTab p a b c d e) f = PacketTab <$> f PID        p 
                                                 <*> f PDateTime  a
                                                 <*> f PDbmSignal b
                                                 <*> f PMacAddr   c
                                                 <*> f PVendor    d
                                                 <*> f PSource    e

    autoTab = autoIncrement pId_

Tableに流し込むためのSink

sinkPacketIntoTable :: (Monad m) => Sink Packet m (Table Packet)
sinkPacketIntoTable = sinkState empty push finish
  where
    push   table pac = return (StateProcessing $ insert pac table)
    finish table     = return table

CSVデコード用のConduit

decodeCsv :: (Monad m)
          => C.DecodeOptions
          -> Bool
          -> GInfConduit S.ByteString m Packet
decodeCsv decOpt header = loop header id
  where
    loop skip front = awaitE >>= either (finish skip front) (go skip front)

    finish skip front r = do
        unless (L.null final) (void $ push $ CS.decodeWith decOpt skip final)
        return r
      where
        final = front L.empty

    go skip sofar more = do
        left <- push $ CS.decodeWith decOpt skip (sofar $ L.fromChunks [more])
        loop False (L.append left)

    push (CS.Cons (Right r) rs) = yield r >> push rs
    push (CS.Cons (Left  m) rs) = push rs
    push (CS.Nil Nothing  left) = return left
    push (CS.Nil (Just m) left) = return left

tsvDecodeOption :: DecodeOptions
tsvDecodeOption = defaultDecodeOptions { decDelimiter = fromIntegral (ord '\t') }

Test

  1. ghci
  2. :m + Control.Lens Data.Foldable Data.Table
  3. :set prompt "ghci> "
  4. :l Wifi

Test Data #1

testList :: [Packet]
testList =
    [ Packet 0 ((read "2012-10-17 23:56:42.8315900 UTC")::DateTime) (-42) (-91) 63 4 "00:26:B6:AA:AA:AA" "Askey Computer" "tp"
    , Packet 0 ((read "2012-10-17 23:56:42.8322130 UTC")::DateTime) (-41) (-91) 65 4 "00:26:B6:AA:AA:AA" "Askey Computer" "tp"
    , Packet 0 ((read "2012-10-17 23:56:52.6748680 UTC")::DateTime) (-92) (-92)  3 3 "44:6D:57:BB:BB:BB" "Liteon Technology Corporation" "tp"
    , Packet 0 ((read "2012-10-17 23:56:52.7029370 UTC")::DateTime) (-92) (-92)  3 3 "44:6D:57:BB:BB:BB" "Liteon Technology Corporation" "tp"
    ]

test1 :: Table Packet
test1 = testList ^. table
  1. try some queries
  • test1
  • test1 ^. with PDbmSignal (>=) (-90)
  • test1 ^. with PDbmSignal (<) (-90)
  • test1 & with PDbmSignal (<) (-90) .~ empty
  • test1 ^. with PDbmSignal (>=) (-90) ^@.. group PMacAddr
  • for_ test1 $ putStrLn . show
  • test1 ^. folded.act $ putStrLn.show
  • toList $ test1

Test Data #2

test2 :: FilePath -> IO (Table Packet)
test2 f = runResourceT $ sourceFile f $= lines =$= decodeCsv opt False $$ sinkPacketIntoTable
  where
    opt = tsvDecodeOption
  1. pt <- test2 "wifi_300.tsv"
  2. try some queries
  • count test2
  • pt ^?! ix 300
  • take 2 $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
  • length $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
  • length $ filter ((2 <=) . (count.snd)) $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
  • map fst $ filter ((2 <=) . (count.snd)) $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
  • let cnt a t = if n > 1 then a + n else a where { n = (count.snd) t }
  • Data.Foldable.foldl' cnt 0 $ pt ^. with PDbmSignal (>=) (-90) ^. with PSource (==) "tp" ^@.. group PMacAddr

疑問

  • projection?
  • いろんなこと
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment