Skip to content

Instantly share code, notes, and snippets.

@itkovian
Created May 31, 2017 21:48
Show Gist options
  • Save itkovian/a7b8efa5a64b1c1ebdb8ac27f8099b51 to your computer and use it in GitHub Desktop.
Save itkovian/a7b8efa5a64b1c1ebdb8ac27f8099b51 to your computer and use it in GitHub Desktop.
Retrieve inode usage for VO filesets from Elasticsearch and print out the relevant usage information
#!/usr/bin/env stack
{- stack
--resolver nightly
--install-ghc
runghc
--package bloodhound
--package optparse-applicative
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
--------------------------------------------------------------------------
import Control.Monad (liftM2)
import Data.Aeson
import Data.List (groupBy, nub, sortBy)
import Data.Maybe
import Data.Monoid ((<>))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Clock (UTCTime (..), addUTCTime,
getCurrentTime,
secondsToDiffTime)
import Database.V5.Bloodhound
import qualified Database.V5.Bloodhound as BH
import qualified Database.V5.Bloodhound.Types as BHT
import GHC.Generics
import Network.HTTP.Client (defaultManagerSettings,
responseBody)
import qualified Options.Applicative as OA
import Text.Printf (printf)
--------------------------------------------------------------------------
data Options = Options
{ oFilesystem :: !String -- ^ The filesystem for which inode allocation is requested
, oInodeThreshold :: !(Maybe Integer) -- ^ The threshold under which filesets will not be mentioned (e.g., default values set in the filesystem when a fileset is created)
, oSort :: !Bool
} deriving (Show)
--------------------------------------------------------------------------
parserOptions :: OA.Parser Options
parserOptions = Options
<$> (OA.strOption $
OA.long "filesystem" <>
OA.metavar "FILESYSTEMNAME" <>
OA.help "GPFS name of the filesystem")
<*> (OA.optional $ OA.option OA.auto $
OA.long "threshold" <>
OA.value 0 <>
OA.help "Threshold for maximum inodes under which filesets are not considered")
<*> (OA.switch $
OA.long "sort" <>
OA.short 's' <>
OA.help "Sort the results according to free")
--------------------------------------------------------------------------
parserInfo :: OA.ParserInfo Options
parserInfo = OA.info (OA.helper <*> parserOptions)
(OA.fullDesc
<> OA.progDesc "Get fileset info from Elasticsearch"
<> OA.header ("gpfs-get-inode-usage")
)
--------------------------------------------------------------------------
-- | 'Inode' contains the information we consider useful in this application, i.e., inode allocation/usage
data Inode = Inode
{ fileset_name :: Text
, max_inodes :: Integer
, alloc_inodes :: Integer
, free_inodes :: Integer
, used_inodes :: Integer -- ^ computed as difference between allocated and free
} deriving (Show, Eq, Generic)
instance FromJSON Inode where
parseJSON = withObject "inode" $ \o -> do
Inode <$> o .: "fileset_name"
<*> o .: "max_inodes"
<*> o .: "alloc_inodes"
<*> o .: "free_inodes"
<*> liftM2 (-) (o .: "alloc_inodes") (o .: "free_inodes")
--------------------------------------------------------------------------
data SourceEntry = SourceEntry
{ mmlsfileset :: Inode -- ^ container for the actual information we want to get (we've got nested data structures in ES)
} deriving (Show, Eq, Generic)
instance FromJSON SourceEntry where
parseJSON = withObject "mmlsfileset" $ \o -> do
SourceEntry <$> o .: "mmlsfileset"
--------------------------------------------------------------------------
main :: IO ()
main = do
options <- OA.execParser parserInfo
now <- getCurrentTime
let timeLimit = RangeDateGte (GreaterThanEqD $ addUTCTime (negate 3600) now)
-- the actual query parameters: limit the query to the filesystem and only look up filesets for the VOs
let match = [ BHT.TermQuery (BHT.Term "mmlsfileset.filesystem_name" (T.pack $ oFilesystem options)) Nothing
, BHT.QueryPrefixQuery $ BHT.PrefixQuery (BHT.FieldName "mmlsfileset.fileset_name") "gvo" Nothing
, QueryRangeQuery $ mkRangeQuery (FieldName "@timestamp") timeLimit
]
-- create a boolean query
let q = BHT.QueryBoolQuery $ BHT.BoolQuery match [] [] [] Nothing Nothing Nothing
-- create the search and override default size of 10 results
let search = BH.mkSearch (Just q) Nothing
let search' = search { BHT.size = BHT.Size 1000 }
-- run the search
response <- runBH_ $ BH.searchByIndex testIndex search'
-- if all went well, we have results and end up Right. The type is added to force Aeson to decode to the correct type
let (Right de_scratch) = eitherDecode . responseBody $ response :: Either String (BHT.SearchResult SourceEntry)
let inode_info = map mmlsfileset . catMaybes. map hitSource . hits . searchHits $ de_scratch
let res = filter (\r -> max_inodes r > (fromJust $ oInodeThreshold options)) -- ditch everything under the threshold
$ map head -- retain the first one (least free inodes)
$ map nub -- remove stuff that's equal
$ map (sortBy (\i1 i2 -> compare (alloc_inodes i2) (alloc_inodes i1))) -- sort the groups in descending allocated inodes (allocation is monotonically rising in time)
$ groupBy (\i1 i2 -> (==) (fileset_name i1) (fileset_name i2)) -- should we have multiple results for each fileset, we group them together
$ sortBy (\i1 i2 -> compare (fileset_name i1) (fileset_name i2)) -- results are retrieved unsorted for performance reasons
$ inode_info
putStrLn . unlines . map (\i -> printf "%s %d %d %d %d" (fileset_name i) (free_inodes i) (used_inodes i) (alloc_inodes i) (max_inodes i))
$ if oSort options then sortBy (\i1 i2 -> compare (used_inodes i1) (used_inodes i2)) res else res
where
testServer = (BHT.Server "http://localhost:9200")
runBH_ = BH.withBH defaultManagerSettings testServer
testIndex = BHT.IndexName "longterm-2017"
indexSettings = BHT.IndexSettings (BHT.ShardCount 1) (BHT.ReplicaCount 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment