Last active
August 29, 2015 14:15
-
-
Save mroth23/6864f47ffe1a25f27ce7 to your computer and use it in GitHub Desktop.
gitstat.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- | |
Updated to latest version - fixing a bug I didn't notice before with merge commit or binary file lines. | |
This tool parses a git history into a Haskell data type. | |
The history can then be further analysed using the ghci command line (all important | |
metrics are known / saved: commit hash, author, files it modifies and size of contributions). | |
I also added an example that prints a graphviz representation of the commit / file graph. | |
-} | |
import Text.Parsec | |
import Text.Parsec.Combinator | |
import Text.Parsec.Char | |
import Text.Parsec.String | |
import Control.Monad | |
-- Data type definitions | |
-- Describes a change (within a commit) to a single file. | |
data Diff = | |
Diff { file :: String | |
, added :: Int | |
, deleted :: Int } | |
deriving Show | |
-- Describes a commit as a list of Diffs with associated metadata. | |
data Commit = | |
Commit { hash :: String | |
, author :: String | |
, date :: String | |
, message :: String | |
, diffs :: [Diff] } | |
deriving Show | |
-- The main function. All input is taken from stdin, then run through the | |
-- parser. Runs on the command line as you would expect. | |
main :: IO () | |
main = do | |
gitLog <- getContents | |
let result = parse runGParser "(unknown)" gitLog | |
case result of | |
(Left _) -> print result | |
(Right rs) -> graphviz rs | |
containsFile :: String -> Commit -> Bool | |
containsFile filename c = | |
filename `elem` (map file (diffs c)) | |
graphviz :: [Commit] -> IO () | |
graphviz cs = do | |
putStrLn "graph {" | |
putStrLn "subgraph cluster_0 {" | |
putStrLn "label=\"Commits\"" | |
forM_ cs (\c -> putStrLn $ (show (take 6 $ hash c)) ++ ";") | |
putStrLn "}" | |
forM_ cs printAllEdges | |
putStrLn "}" | |
where | |
printAllEdges :: Commit -> IO () | |
printAllEdges commit = | |
forM_ (map file (diffs commit)) (\filename -> putStrLn $ (show $ take 6 $ hash commit) ++ " -- " ++ (show filename)) | |
-- The actual parser implementation. | |
runGParser :: GenParser Char () [Commit] | |
runGParser = do | |
result <- many commit | |
eof | |
return result | |
commit :: GenParser Char () Commit | |
commit = do | |
chash <- commitHash | |
optional commitMerge | |
cauth <- commitAuthor | |
cdate <- commitDate | |
endOfLine | |
cmesg <- commitMessage | |
cdiffs <- commitDiffs | |
return (Commit { hash = chash | |
, author = cauth | |
, date = cdate | |
, message = cmesg | |
, diffs = cdiffs }) | |
commitMerge = try (do | |
spaces | |
string "Merge:" | |
many (noneOf "\n") | |
return ()) | |
commitHash :: GenParser Char () String | |
commitHash = do | |
spaces | |
string "commit" | |
spaces | |
hashCode <- count 40 hexDigit | |
return $ hashCode | |
commitAuthor :: GenParser Char () String | |
commitAuthor = do | |
spaces | |
string "Author:" | |
spaces | |
authorString <- many (noneOf "\n") | |
endOfLine | |
return authorString | |
commitDate :: GenParser Char () String | |
commitDate = do | |
spaces | |
string "Date:" | |
spaces | |
dateString <- many (noneOf "\n") | |
endOfLine | |
return dateString | |
commitMessage :: GenParser Char () String | |
commitMessage = do | |
msgLines <- manyTill (manyTill anyChar (try endOfLine)) (try $ endOfLine) | |
return $ concat msgLines | |
commitDiffs = | |
manyTill (commitDiff <|> commitDiffB) (try commitSummary) | |
commitSummary = try (do | |
spaces | |
many1 digit | |
spaces | |
(try $ string "files changed") <|> (try $ string "file changed") | |
many (noneOf "\n") | |
endOfLine) | |
commitDiff = try (do | |
filename <- manyTill anyChar (try $ char '|') | |
spaces | |
many digit | |
spaces | |
changes <- many (oneOf "+-") | |
optional endOfLine | |
return $ Diff { file = takeWhile (/= ' ') (dropWhile (== ' ') filename) | |
, added = length (filter (== '+') changes) | |
, deleted = length (filter (== '-') changes) }) | |
commitDiffB = try (do | |
filename <- manyTill anyChar (try $ char '|') | |
spaces | |
string "Bin" | |
spaces | |
many digit | |
string " -> " | |
many digit | |
string " bytes" | |
optional endOfLine | |
return $ Diff { file = takeWhile (/= ' ') (dropWhile (== ' ') filename) | |
, added = 1 | |
, deleted = 0 }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment