Skip to content

Instantly share code, notes, and snippets.

@mroth23
Last active August 29, 2015 14:15
Show Gist options
  • Save mroth23/6864f47ffe1a25f27ce7 to your computer and use it in GitHub Desktop.
Save mroth23/6864f47ffe1a25f27ce7 to your computer and use it in GitHub Desktop.
gitstat.hs
{-
Updated to latest version - fixing a bug I didn't notice before with merge commit or binary file lines.
This tool parses a git history into a Haskell data type.
The history can then be further analysed using the ghci command line (all important
metrics are known / saved: commit hash, author, files it modifies and size of contributions).
I also added an example that prints a graphviz representation of the commit / file graph.
-}
import Text.Parsec
import Text.Parsec.Combinator
import Text.Parsec.Char
import Text.Parsec.String
import Control.Monad
-- Data type definitions
-- Describes a change (within a commit) to a single file.
data Diff =
Diff { file :: String
, added :: Int
, deleted :: Int }
deriving Show
-- Describes a commit as a list of Diffs with associated metadata.
data Commit =
Commit { hash :: String
, author :: String
, date :: String
, message :: String
, diffs :: [Diff] }
deriving Show
-- The main function. All input is taken from stdin, then run through the
-- parser. Runs on the command line as you would expect.
main :: IO ()
main = do
gitLog <- getContents
let result = parse runGParser "(unknown)" gitLog
case result of
(Left _) -> print result
(Right rs) -> graphviz rs
containsFile :: String -> Commit -> Bool
containsFile filename c =
filename `elem` (map file (diffs c))
graphviz :: [Commit] -> IO ()
graphviz cs = do
putStrLn "graph {"
putStrLn "subgraph cluster_0 {"
putStrLn "label=\"Commits\""
forM_ cs (\c -> putStrLn $ (show (take 6 $ hash c)) ++ ";")
putStrLn "}"
forM_ cs printAllEdges
putStrLn "}"
where
printAllEdges :: Commit -> IO ()
printAllEdges commit =
forM_ (map file (diffs commit)) (\filename -> putStrLn $ (show $ take 6 $ hash commit) ++ " -- " ++ (show filename))
-- The actual parser implementation.
runGParser :: GenParser Char () [Commit]
runGParser = do
result <- many commit
eof
return result
commit :: GenParser Char () Commit
commit = do
chash <- commitHash
optional commitMerge
cauth <- commitAuthor
cdate <- commitDate
endOfLine
cmesg <- commitMessage
cdiffs <- commitDiffs
return (Commit { hash = chash
, author = cauth
, date = cdate
, message = cmesg
, diffs = cdiffs })
commitMerge = try (do
spaces
string "Merge:"
many (noneOf "\n")
return ())
commitHash :: GenParser Char () String
commitHash = do
spaces
string "commit"
spaces
hashCode <- count 40 hexDigit
return $ hashCode
commitAuthor :: GenParser Char () String
commitAuthor = do
spaces
string "Author:"
spaces
authorString <- many (noneOf "\n")
endOfLine
return authorString
commitDate :: GenParser Char () String
commitDate = do
spaces
string "Date:"
spaces
dateString <- many (noneOf "\n")
endOfLine
return dateString
commitMessage :: GenParser Char () String
commitMessage = do
msgLines <- manyTill (manyTill anyChar (try endOfLine)) (try $ endOfLine)
return $ concat msgLines
commitDiffs =
manyTill (commitDiff <|> commitDiffB) (try commitSummary)
commitSummary = try (do
spaces
many1 digit
spaces
(try $ string "files changed") <|> (try $ string "file changed")
many (noneOf "\n")
endOfLine)
commitDiff = try (do
filename <- manyTill anyChar (try $ char '|')
spaces
many digit
spaces
changes <- many (oneOf "+-")
optional endOfLine
return $ Diff { file = takeWhile (/= ' ') (dropWhile (== ' ') filename)
, added = length (filter (== '+') changes)
, deleted = length (filter (== '-') changes) })
commitDiffB = try (do
filename <- manyTill anyChar (try $ char '|')
spaces
string "Bin"
spaces
many digit
string " -> "
many digit
string " bytes"
optional endOfLine
return $ Diff { file = takeWhile (/= ' ') (dropWhile (== ' ') filename)
, added = 1
, deleted = 0 })
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment