chetanmeh/oakRepoStats.groovy

## readme.md

      
    Raw
  

              readme.md
            
          
    To make use of this script
This script is now moved to https://github.com/chetanmeh/oak-console-scripts. Refer to that for updated version
1. Download latest oak-run

$ wget -O oak-run-1.6.0.jar 'https://search.maven.org/remotecontent?filepath=org/apache/jackrabbit/oak-run/1.6.0/oak-run-1.6.0.jar'

Or download it from here
2. Execute Script via oak-run console

Run console from and load this script
For newer version of AEM

$ java -Xmx4g -jar oak-run*.jar console /path/to/segmentstore ":load https://gist.githubusercontent.com/chetanmeh/d7588d96a839dd2d26760913e4055215/raw/oakRepoStats.groovy"

If you are getting stats from on old segmentstore format pass --segment=true as argument to console command
For AEM 6.0-6.2

$ java -Xmx4g -jar oak-run*.jar console --segment=true /path/to/segmentstore ":load https://gist.githubusercontent.com/chetanmeh/d7588d96a839dd2d26760913e4055215/raw/oakRepoStats.groovy"

This script can be used against any Oak version safely. It connects to the repository in read-only mode and it does not require access to DataStore
This would read the repo and generate 2 files which contains various stats related to repository content

repo-stats.json - stats in json data
repo-stats.txt - Stats in text form

Running via ssh
Set system property -Djline.terminal=jline.UnsupportedTerminal when running the command via nohup
$ nohup java -Xmx4g -Djline.terminal=jline.UnsupportedTerminal -jar oak-run*.jar console --segment=true /path/to/segmentstore ":load https://gist.githubusercontent.com/chetanmeh/d7588d96a839dd2d26760913e4055215/raw/oakRepoStats.groovy" &


## oakRepoStats.groovy
import com.codahale.metrics.ConsoleReporter
import com.codahale.metrics.MetricRegistry
import com.google.common.base.Predicate
import com.google.common.base.Stopwatch
import com.google.common.collect.AbstractIterator
import com.google.common.collect.FluentIterable
import com.google.common.collect.Iterators
import com.google.common.collect.PeekingIterator
import com.google.common.collect.Sets
import com.google.common.collect.TreeTraverser
import groovy.json.JsonOutput
import groovy.text.SimpleTemplateEngine
import groovy.transform.*
import org.apache.commons.io.output.WriterOutputStream
import org.apache.jackrabbit.JcrConstants
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.api.PropertyState
import org.apache.jackrabbit.oak.api.Tree
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.commons.PathUtils
import org.apache.jackrabbit.oak.commons.sort.StringSort
import org.apache.jackrabbit.oak.plugins.blob.datastore.InMemoryDataRecord
import org.apache.jackrabbit.oak.plugins.tree.TreeFactory
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.apache.jackrabbit.oak.util.TreeUtil

import javax.jcr.PropertyType
import java.util.concurrent.TimeUnit

import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount

@CompileStatic
class RepoStats {
    final boolean excludeIndexStats = Boolean.getBoolean("excludeIndexStats")
    final String rootPath = System.getProperty("rootPath", "/")

    final static String VERSION = "1.2"

    MetricRegistry metrics = new MetricRegistry()
    NodeStore nodeStore;
    List<String> excludedIndexes =[]
    BinaryStats binaryStats = new BinaryStats()

    long indexNodeCount;
    long systemNodeCount;
    long versionStoreCount;
    long permStoreCount;
    long jcrNodeCount;
    long totalCount

    boolean statsWritten

    Map<String, NodeTypeStats> nodeTypeStats = [:].withDefault {key -> new NodeTypeStats(name:(String)key)}
    Map<String, MimeTypeStats> mimeTypeStats = [:].withDefault { key -> new MimeTypeStats(name:(String)key)}
    Map<String, PathStats> pathStats = [:].withDefault { key -> new PathStats(path:(String)key)}

    List<IndexStats> propertyIndexStats = []
    List<LuceneIndexStats> luceneIndexStats = []
    final int maxIndexedEntryCount = 10

    def detailedStatsEnabled = true
    private Set<String> detailedStats = [
            'dam:AssetContent',
            'cq:PageContent',
            'rep:User',
            'rep:Group',
            'nt:file',
            'nt:versionHistory'
    ] as HashSet

    private List<String> impPaths = [
            '/content/dam',
            '/var/audit'
    ]

    private File statsFileJson = new File("repo-stats.json")
    private File statsFileTxt = new File("repo-stats.txt")
    Stopwatch watch = Stopwatch.createStarted()
    boolean useChildNodeCount
    private final Predicate<Tree> countingPredicate =  new Predicate<Tree>(){
            boolean apply(Tree input) {
                if (input && input.exists()) {
                    tick(input)
                    collectTypeStats(input)
                    collectBinaryStats(input)
                }
                return true
            }
    }

    def dumpStats(){
        init()
        Tree root = TreeFactory.createReadOnlyTree(nodeStore.root)
        root = TreeUtil.getTree(root, rootPath)
        getFilteringTraversor(root).each { Tree t ->
            def path = t.path
            if (path.endsWith('oak:index')){
                if (!excludeIndexStats) {
                    long count = collectIndexStats(t)
                    if (path == '/oak:index') {
                        pathStats[path].count += count
                    }
                }
            } else if (path.startsWith('/jcr:system')){
                systemNodeCount++
                if (path.startsWith('/jcr:system/jcr:versionStorage')){
                    versionStoreCount++
                } else if (path.startsWith('/jcr:system/rep:permissionStore')){
                    permStoreCount++
                }
            } else {
                if (t.hasProperty(JCR_PRIMARYTYPE)) {
                    jcrNodeCount++
                }
            }
            collectPathStats(t.path)
        }

        writeStats()
        statsWritten = true
    }


    def init() {
        useChildNodeCount = nodeStore.root.class.name.endsWith("SegmentNodeState")
        if (useChildNodeCount){
            println("Using useChildNodeCount for estimating immediate children count")
        }

        if (excludeIndexStats){
            println "Index stats collection excluded via flag 'excludeIndexStats'"
        }

        println "Using root path as ${rootPath}"

        addShutdownHook {
            if (!statsWritten){
                writeStats()
            }
        }
    }

    //~----------------------------------------< PathStats >

    @EqualsAndHashCode
    @Sortable
    static class PathStats{
        long count
        String path
        String jcrPercent
        String overallPercent

        String getPathStr(){
            if (!path.startsWith('/')){
                return '/' + path
            }
            return path
        }

        String getCountStr(){
            return displayCount(count)
        }

        void computePercent(long jcrCount, long overAllCount){
            jcrPercent = String.format("%1.2f %%", (double)count/jcrCount * 100)
            overallPercent = String.format("%1.2f %%", (double)count/overAllCount * 100)

            if (path.contains('oak:index') || path.contains('jcr:system')){
                jcrPercent = "-"
            }
        }
    }

    def collectPathStats(String path) {
        if (PathUtils.denotesRoot(path)){
            return
        }

        String firstPart = PathUtils.elements(path).iterator().next()
        pathStats[firstPart].count++

        impPaths.each {p ->
            if (path.startsWith(p)){
                pathStats[p].count++
            }
        }
    }

    def processPathStats() {
        pathStats.values().each {PathStats ps -> ps.computePercent(jcrNodeCount, totalCount)}
    }

    //~----------------------------------------< Node Statistics >

    static class NodeTypeStats{
        String name
        long count
        long subTreeCount
        long referredBinarySize
        int percentJcrContent
    }

    def collectTypeStats(Tree t) {
        PropertyState primaryType = t.getProperty(JCR_PRIMARYTYPE)
        if (primaryType) {
            String type = primaryType.getValue(Type.NAME)
            nodeTypeStats[type].count++

            if (detailedStatsEnabled && detailedStats.contains(type)) {
                collectSubtreeStats(nodeTypeStats[type], t)
            }
        }

        PropertyState mixinType = t.getProperty(JCR_MIXINTYPES)
        if (mixinType){
            mixinType.getValue(Type.NAMES).each {type ->
                nodeTypeStats[type].count++
            }
        }
    }

    def collectSubtreeStats(NodeTypeStats stats, Tree tree) {
        //Get a histogram of number of child nodes
        FluentIterable<Tree> itr = getTreeTraversor(tree)
        int rootDepth = PathUtils.getDepth(tree.path)
        itr = itr.filter(excludeTreeOfSameType(stats, tree))
        int depth = 0
        itr = itr.filter({Tree t -> depth = Math.max(depth, PathUtils.getDepth(t.path) - rootDepth); return true} as Predicate)
        int subTreeCount
        switch (stats.name) {
            case 'dam:AssetContent' :
                subTreeCount = collectAssetStats(stats, tree, itr);
                break
            default:
                subTreeCount = itr.size()
        }

        stats.subTreeCount += subTreeCount
        metrics.histogram(metricName(stats.name, "subtree_depth")).update(depth)
        metrics.histogram(metricName(stats.name, "subtree_count")).update(subTreeCount)
    }

    int collectAssetStats(NodeTypeStats stats, Tree tree, FluentIterable<Tree> itr) {
        long originalSize = 0
        long renditionSize = 0
        long xmpCount = 0
        itr = itr.filter({Tree t ->
            if (t.name == 'jcr:content') {
                MimeTypeStats binStats = getBinaryStats(t)
                if (binStats) {
                    if (t.getParent().name == 'original') {
                        originalSize += binStats.totalSize
                    } else {
                        renditionSize += binStats.totalSize
                        metrics.histogram(metricName(stats.name, "renditions")).update(binStats.totalSize)
                    }
                }
            }
            if (t.name == 'metadata'){
                xmpCount = getTreeTraversor(t).size()
            }
            return true
        } as Predicate<Tree>)

        //Read the iterator
        int subtreeCount = itr.size()

        metrics.histogram(metricName(stats.name, "original")).update(originalSize)
        metrics.histogram(metricName(stats.name, "renditions_total")).update(renditionSize)
        metrics.histogram(metricName(stats.name, "xmp")).update(xmpCount)

        stats.referredBinarySize += originalSize + renditionSize

        return subtreeCount
    }

    private Predicate<Tree> excludeTreeOfSameType(NodeTypeStats stats, Tree tree) {
        new Predicate<Tree>() {
            boolean apply(Tree input) {
                PropertyState primaryType = input.getProperty(JCR_PRIMARYTYPE)
                //Exclude any sub nodetree which is based on current type. Like a dam:AssetContent
                //having dam:AssetContent as one of the sub nodes
                if (primaryType
                        && primaryType.getValue(Type.NAME) == stats.name
                        && input.path != tree.path) {
                    return false
                }
                return true
            }
        }
    }

    static String metricName(String nodeType, String type){
        return "${type}_${nodeType}"
    }

    private List<NodeTypeStats> processNodeTypeStats() {
        List<NodeTypeStats> nodeTypeStatsList = new ArrayList(nodeTypeStats.values())
        nodeTypeStatsList.sort { -it.count }
        nodeTypeStatsList.each {NodeTypeStats s ->
            s.percentJcrContent = (int) ((double) s.subTreeCount/jcrNodeCount * 100)
        }
        return nodeTypeStatsList
    }

    def tick(Tree t) {
        if (++totalCount % 10000 == 0){
            println("Traversed $totalCount (${withSuffix(totalCount)})...")
        }

        if (t.path.contains('/oak:index')){
            indexNodeCount++
        }
    }

    //~-----------------------------------------------------< Binary Statistics >

    static final String MIMETYPE_UNKNOWN = 'unknown'

    @ToString(excludes = ['blobIds'], includeNames = true)
    @EqualsAndHashCode
    @Sortable(excludes = ['blobIds'])
    static class MimeTypeStats{
        long externalSize
        long inlinedSize
        int count
        String name
        Set<String> blobIds = Collections.emptySet()

        String getSizeStr(){
            //Used in template writeBinaryStats
            return "${withSizeSuffix(totalSize)} (E:${withSizeSuffix(externalSize)}, I:${withSizeSuffix(inlinedSize)})"
        }

        long getTotalSize(){
            return externalSize + inlinedSize
        }

        MimeTypeStats add(MimeTypeStats stats){
            if (stats) {
                count += stats.count
                externalSize += stats.externalSize
                inlinedSize += stats.inlinedSize
            }
            return this
        }
    }

    static class BinaryStats {
        TypedBinaryStats files = new TypedBinaryStats()
        TypedBinaryStats versionedFiles = new TypedBinaryStats()
        TypedBinaryStats index = new TypedBinaryStats()
        TypedBinaryStats unknown = new TypedBinaryStats()
        TypedBinaryStats all = new TypedBinaryStats()

        void add(Tree t, long size, int count, Set<String> blobIds, long inlineSize){
            if (PathUtils.denotesRoot(t.path)){
                return
            }

            TypedBinaryStats typedStats = unknown

            if (t.parent.getProperty(JCR_PRIMARYTYPE)?.getValue(Type.STRING) == 'nt:file'){
                typedStats = files
            } else if (t.path.contains('/oak:index')){
                typedStats = index
            } else if (t.path.startsWith('/jcr:system')
                    && t.parent.getProperty('jcr:frozenPrimaryType')?.getValue(Type.STRING) == 'nt:file'){
                typedStats = versionedFiles
            }

            typedStats.add(size, inlineSize, count, blobIds)
            all.add(size, inlineSize, count, blobIds)
        }

        void computeSize(){
            List<TypedBinaryStats> allStats = [files, versionedFiles, index, unknown, all]
            allStats.each {it.computeSize(); }
            versionedFiles.diff(files)
            unknown.diff(files)

            allStats.each {it.close()}
        }
    }

    static class TypedBinaryStats{
        long inlineSize
        long externalSize
        long actualSize = -1
        int count
        StringSort blobIds = new StringSort(1000, String.CASE_INSENSITIVE_ORDER)

        String getSizeStr(){
            //Used in template writeBinaryStats
            return humanReadableByteCount(externalSize)
        }

        String getActualSizeStr(){
            //Used in template writeBinaryStats
            return humanReadableByteCount(actualSize)
        }

        String getBlobIds() {
            return "NA"
        }

        String toString(){
            "${withSizeSuffix(totalSize)} ($count) (Deduplicated ${withSizeSuffix(actualSize)}, Inlined ${withSuffix(inlineSize)})"
        }

        long getTotalSize(){
            return inlineSize + externalSize
        }

        void add(long size, long inlineSize, int count, Set<String> blobIds){
            this.externalSize += size
            this.count += count
            this.inlineSize += inlineSize
            def that = this
            blobIds.each {id -> that.addId(id)}
        }

        private void addId(String id){
            if (!isInlined(id)) {
                this.@blobIds.add(id)
            }
        }

        void computeSize(){
            if (actualSize < 0){
                actualSize = computeSizeFromSort(blobIds())
            }
        }

        void close(){
            blobIds().close()
        }

        StringSort blobIds(){
            this.@blobIds
        }

        void diff(TypedBinaryStats other) {
            //Computes the diff like this - other i.e. size of blobs which are present
            //in this but not present in other
            StringSort diffIds = new StringSort(1000, String.CASE_INSENSITIVE_ORDER)
            Iterator<String> diffItr = new DiffIterator(blobIds(), other.blobIds())
            while(diffItr.hasNext()){
                diffIds.add(diffItr.next())
            }
            actualSize = computeSizeFromSort(diffIds)
            diffIds.close()
        }

        private long computeSizeFromSort(StringSort blobIds) {
            long totalSize = 0
            blobIds.sort()
            blobIds.each { id ->
                totalSize += RepoStats.getBlobSizeFromId(id)
            }
            return totalSize
        }
    }

    def collectBinaryStats(Tree tree) {
        MimeTypeStats stats = getBinaryStats(tree)
        if (stats) {
            mimeTypeStats[stats.name].add(stats)
            binaryStats.add(tree, stats.externalSize, stats.count, stats.blobIds, stats.inlinedSize)

            metrics.histogram("inlined_binary").update(stats.inlinedSize)
        }
    }

    MimeTypeStats getBinaryStats(Tree tree){
        long size = 0
        long inlinedSize = 0
        Set<String> blobIds = Sets.newHashSet()

        def blobProcessor = {Blob b->
            long blobSize = getBlobSize(b)

            if (b.getContentIdentity() != null){
                blobIds.add(b.getContentIdentity())
            }

            if (isInlined(tree, b)){
                inlinedSize += blobSize
            } else {
                size += blobSize
            }
        }

        getProps(tree).each {PropertyState ps ->
            if (ps.type.tag() != PropertyType.BINARY){
                return
            }
            if (ps.isArray()){
                for (int i = 0; i < ps.count(); i++) {
                    Blob b = ps.getValue(Type.BINARY, i)
                    blobProcessor(b)
                }
            } else {
                Blob b = ps.getValue(Type.BINARY)
                blobProcessor(b)
            }
        }

        if (size || inlinedSize){
             String mimeType = tree.getProperty(JcrConstants.JCR_MIMETYPE)?.getValue(Type.STRING)
            if (!mimeType){
                mimeType = MIMETYPE_UNKNOWN
            }
            return new MimeTypeStats(name: mimeType, count: 1, externalSize: size,
                    blobIds: blobIds, inlinedSize: inlinedSize)
        }
        return null
    }

    def computeBlobStats() {
        binaryStats.computeSize()

    }

    static boolean isInlined(Tree t, Blob blob){
        String blobId = blob.contentIdentity
        return isInlined(blobId)
    }

    static boolean isInlined(String blobId) {
        if (!blobId) {
            return true
        }

        if (InMemoryDataRecord.isInstance(blobId)) {
            return true
        }

        //OAK-4789 - SegmentBlob returns recordId. Check if it contains '.'
        //RecordId indicates its inlined blob
        if (blobId.contains(".")) {
            return true
        }

        return false
    }

    static long getBlobSize(Blob blob){
        long size = 0
        try{
            //Check first directly
            size = blob.length()
        } catch (Exception e){
            //May be blobstore not configured. Extract length encoded in id
            String blobId = blob.getContentIdentity()
            size = getBlobSizeFromId(blobId)
        }
        return size
    }

    static long getBlobSizeFromId(String blobId) {
        if (blobId) {
            int indexOfHash = blobId.lastIndexOf('#')
            if (indexOfHash > 0) {
               return blobId.substring(indexOfHash + 1) as long
            }
        }
        return 0
    }

    @CompileStatic(TypeCheckingMode.SKIP)
    static Iterable<PropertyState> getProps(Tree t){
        //Workaround Groovy compiler issue
        return t.properties
    }

    //~-----------------------------------------------------< Index Statistics >

    @ToString(excludes = ['indexedStats'], includeNames = true)
    static class IndexStats {
        String name
        boolean disabled
        boolean unique
        long childCount
        long entryCount
        long indexedEntryCount
        Set<IndexEntryStats> indexedStats = new HashSet<>()
    }

    @EqualsAndHashCode
    @Sortable
    static class IndexEntryStats {
        long entryCount
        long childCount
        String name
    }

    @EqualsAndHashCode
    @Sortable
    static class LuceneIndexStats {
        long size
        int count
        String name

        String getSizeStr(){
            //Used in template writeBinaryStats
            return humanReadableByteCount(size)
        }
    }

    long collectIndexStats(Tree oakIndex){
        long indexNodeCountAtStart = indexNodeCount
        oakIndex.children.each {Tree indexNode ->

            def idxName = indexNode.path

            if (excludedIndexes.contains(idxName)){
                println("Excluding index $idxName")
                return
            }

            def type = indexNode.getProperty("type")?.getValue(Type.STRING)

            if (type == 'property' || type == 'disabled'){
                println("Processing $idxName")

                countingPredicate.apply(indexNode)

                boolean unique = indexNode.getProperty('unique')?.getValue(Type.BOOLEAN)
                def idxStats = new IndexStats(name:idxName, unique: unique, disabled: type == 'disabled')

                def contentNode = indexNode.getChild(':index')
                countingPredicate.apply(contentNode)

                if (unique) {
                    collectUniqueIndexStats(contentNode, idxStats)
                } else {
                    collectPropertyIndexStats(contentNode, idxStats)
                }

                println("  $idxStats")
                propertyIndexStats << idxStats
            } else if (type == 'lucene'){
                collectLuceneIndexStats(idxName, indexNode)
            } else {
                indexNodeCount += getCountingTraversor(indexNode).size()
            }
        }
        return indexNodeCount - indexNodeCountAtStart
    }

    private void collectLuceneIndexStats(String idxName, Tree indexNode) {
        LuceneIndexStats idxStats = new LuceneIndexStats(name: idxName)
        indexNodeCount += getCountingTraversor(indexNode)
                .filter({ Tree idxTree ->
                    MimeTypeStats mimeStats = getBinaryStats(idxTree)
                    if (mimeStats) {
                        idxStats.size += mimeStats.externalSize
                        idxStats.count += mimeStats.count
                    }
                    return true
                } as Predicate)
                .size()
        luceneIndexStats << idxStats
    }

    private void collectPropertyIndexStats(Tree contentNode, IndexStats idxStats) {
        TreeSet<IndexEntryStats> indexedStats = new TreeSet<>(Collections.reverseOrder())
        contentNode.children.each { Tree indexedStateEntry ->
            IndexEntryStats indexEntryStats = getIndexEntryStats(indexedStateEntry)
            idxStats.entryCount += indexEntryStats.entryCount
            idxStats.childCount += indexEntryStats.childCount

            idxStats.indexedEntryCount++
            indexedStats << indexEntryStats

            if (indexedStats.size() > maxIndexedEntryCount){
                indexedStats.pollLast()
            }
        }

        idxStats.indexedStats = indexedStats
    }

    private void collectUniqueIndexStats(Tree contentNode, IndexStats idxStats) {
        if (useChildNodeCount) {
            //use faster route for big hierarchies
            long count = contentNode.getChildrenCount(Integer.MAX_VALUE);
            idxStats.childCount = count
            totalCount += count
            indexNodeCount += count
            println("Obtained count [$count] via getChildrenCount")
        } else {
            idxStats.childCount = getCountingTraversor(contentNode).size()
        }

        idxStats.entryCount = idxStats.childCount
    }

    private IndexEntryStats getIndexEntryStats(Tree indexEntry){
        FluentIterable<Tree> itr = getCountingTraversor(indexEntry)
        def stats = new IndexEntryStats(name: indexEntry.name)
        itr.each {Tree ns  ->
            if (ns.hasProperty('match')){
                stats.entryCount++
            }
            stats.childCount++
        }
        return stats
    }

    //~--------------------------------------------< Stats Reporting >

    private void writeStats() {
        processPathStats()
        List<NodeTypeStats> nodeTypeStatsList = processNodeTypeStats()
        luceneIndexStats = luceneIndexStats.sort().reverse()

        propertyIndexStats.sort{-it.childCount}

        computeBlobStats()

        writeStatsInText(nodeTypeStatsList)
        writeStatsInJson(nodeTypeStatsList)

        println("Stats in json format dumped to ${statsFileJson.getAbsolutePath()}")
        println("Stats in tx format dumped to ${statsFileTxt.getAbsolutePath()}")
        println("Total time taken : $watch")
    }


    private void writeStatsInText(nodeTypeStats) {
        statsFileTxt.withPrintWriter { pw ->
            pw.println("Number of NodeStates")
            pw.println("\tTotal     : ${displayCount(totalCount)}")
            pw.println("\tin index  : ${displayCount(indexNodeCount)}")
            pw.println("\tin system : ${displayCount(systemNodeCount)}")
            pw.println("\tin JCR    : ${displayCount(jcrNodeCount)}")
            pw.println()
            pw.println("Number of NodeStates under /jcr:system")
            pw.println("\tTotal                                : ${displayCount(systemNodeCount)}")
            pw.println("\tin /jcr:system/jcr:versionStorage    : ${displayCount(versionStoreCount)}")
            pw.println("\tin /jcr:system/rep:permissionStore   : ${displayCount(permStoreCount)}")
            pw.println()
            pw.println("Binary stats")
            pw.println("The size reported below does not account for deduplication. Hence reported")
            pw.println("total size might be much more than actual datastore size")
            pw.println("\tTotal                                : ${binaryStats.all}")
            pw.println("\t\t nt:file                           : ${binaryStats.files}")
            pw.println("\t\t oak:index                         : ${binaryStats.index}")
            pw.println("\t\t versioned nt:file                 : ${binaryStats.versionedFiles}")
            pw.println("\t\t unknown                           : ${binaryStats.unknown}")
            pw.printf("Report generated on :%tc%n", new Date())

            pw.println()
            writeNodeTypeStats(pw, nodeTypeStats)
            writePathStats(pw)
            writeBinaryStats(pw)
            writeMetrics(pw)

            writeIndexStats(pw)
            pw.println("Total time taken   : $watch")
        }
    }


    private static String displayCount(long count){
        return "$count (${withSuffix(count)})"
    }

    private void writeStatsInJson(List<NodeTypeStats> nodeTypeStats) {
        def jsonData = [
                scriptVersion     : VERSION,
                totalCount        : totalCount,
                indexNodeCount    : indexNodeCount,
                systemNodeCount   : systemNodeCount,
                jcrNodeCount      : jcrNodeCount,
                versionStoreCount : versionStoreCount,
                permStoreCount    : permStoreCount,
                date              : String.format("%tc", new Date()),
                timestamp         : System.currentTimeMillis(),
                timetaken         : watch.elapsed(TimeUnit.SECONDS),
                timetakenStr      : watch.toString(),
                nodeTypeStats     : nodeTypeStats,
                pathStats         : pathStats,
                propertyIndexStats: propertyIndexStats,
                luceneIndexStats  : luceneIndexStats,
                binaryStats       : binaryStats,
                metrics           : getMetricsAsMap()
        ]

        statsFileJson.text = JsonOutput.prettyPrint(JsonOutput.toJson(jsonData))
    }

    private def getMetricsAsMap() {
        metrics.histograms.collectEntries {name, hist ->
            def histMap = [
                    count : hist.count,
                    median: hist.snapshot.median,
                    min   : hist.snapshot.min,
                    max   : hist.snapshot.max,
                    mean  : hist.snapshot.mean,
                    stdDev: hist.snapshot.stdDev,
                    p75   : hist.snapshot.'75thPercentile',
                    p95   : hist.snapshot.'95thPercentile',
                    p98   : hist.snapshot.'98thPercentile',
                    p99   : hist.snapshot.'99thPercentile',
                    p999  : hist.snapshot.'999thPercentile',
            ]
            return [name, histMap]
        }
    }

    private void writeBinaryStats(PrintWriter pw) {
        def columns = [
                [name: "name", displayName: "Name", size: 45],
                [name: "count", displayName: "Count", size: 10],
                [name: "totalSize", displayName: "Size", size: 15],
                [name: "sizeStr", displayName: "Size", size: 45],
        ]
        List<MimeTypeStats> stats = new ArrayList<>(mimeTypeStats.values())
        stats.sort()
        stats = stats.reverse()
        pw.println(dumpStats("Binary stats", stats, columns))
    }

    def writePathStats(PrintWriter pw) {
        def columns = [
                [name: "pathStr", displayName: "Path", size: 25],
                [name: "countStr", displayName: "Count", size: 25],
                [name: "jcrPercent", displayName: "% JCR", size: 10],
                [name: "overallPercent", displayName: "% All", size: 10],
        ]
        List<PathStats> stats = new ArrayList<>(pathStats.values())
        stats = stats.sort().reverse()
        pw.println(dumpStats("Path stats (Total ${displayCount(totalCount)})", stats, columns))
    }

    private void writeNodeTypeStats(PrintWriter pw, def nodeTypeStats) {
        def columns = [
                [name: "name", displayName: "Name", size: 45],
                [name: "count", displayName: "Count", size: 10],
                [name: "subTreeCount", displayName: "Subtree Count", size: 15],
                [name: "percentJcrContent", displayName: "% JCR", size: 10],
        ]
        pw.println(dumpStats("Nodetype stats", nodeTypeStats, columns))
    }

    private void writeMetrics(PrintWriter pw) {
        if (!metrics.getMetrics().isEmpty()) {
            pw.println("Histogram of count of child nodes of mentioned types")
            PrintStream ps = new PrintStream(new WriterOutputStream(pw))
            ConsoleReporter.forRegistry(metrics).outputTo(ps).build().report()
            ps.flush()
        }
    }

    private void writeIndexStats(PrintWriter pw) {
        def header = "Overall index stats"
        def output = writeIndexStats(header, propertyIndexStats)
        pw.println(output)

        writeLuceneIndexStats(pw);

        propertyIndexStats.each { s ->
            if (s.childCount > 0 && !s.unique) {
                header = "Stats for [${s.name}]."
                if (s.indexedEntryCount > maxIndexedEntryCount) {
                    header += " Listing top ${s.indexedStats.size()} out of total ${s.indexedEntryCount}"
                }
                output = writeIndexStats(header, s.indexedStats)
                pw.println(output)
            }
        }
    }

    def writeLuceneIndexStats(PrintWriter pw) {
        def columns = [
                [name:"name",displayName:"Name",size:45],
                [name:"size",displayName:"Size",size:10],
                [name:"sizeStr",displayName:"Size",size:10],
        ]
        pw.println(dumpStats("Lucene Index Stats", luceneIndexStats, columns))
    }

    private def writeIndexStats(String header, def stats) {
        def columns = [
                [name:"entryCount",displayName:"Entry Count",size:10],
                [name:"childCount",displayName:"Child Count",size:10],
                [name:"name",displayName:"Name",size:45],
        ]
        dumpStats(header, stats, columns)
    }

    //~---------------------------------------------< Traversor implementations >

    FluentIterable<Tree> getFilteringTraversor(Tree tree ){
        FluentIterable<Tree> itr = getTreeTraversor(tree)

        //Only include nodes upto /oak:index
        return getCountingTraversor(itr.filter(new Predicate<Tree>() {
            boolean apply(Tree t) {
                //Some property index seem to index oak:index as value
                int oakIndexCount = t.path.count('oak:index')
                if (oakIndexCount > 0){
                    if (t.path.endsWith('oak:index') && oakIndexCount == 1){
                        return true
                    } else {
                        return false
                    }
                } else {
                    return true
                }
            }
        }))
    }

    FluentIterable<Tree> getCountingTraversor(Tree tree) {
        return getTreeTraversor(tree).filter(countingPredicate)
    }

    FluentIterable<Tree> getCountingTraversor(FluentIterable<Tree> itr) {
        return itr.filter(countingPredicate)
    }

    static FluentIterable<Tree> getTreeTraversor(Tree t){
        def traversor = new TreeTraverser<Tree>(){
            Iterable<Tree> children(Tree root) {
                return root.children
            }
        }
       return traversor.preOrderTraversal(t)
    }

    //~--------------------------------------------< Text Table Template >

    def dumpStats(String header, def stats, def columns){
        StringWriter sw = new StringWriter()
        PrintWriter pw = new PrintWriter(sw)
        pw.println(header)
        pw.println()

        def ttf = new TemplateFactory()
        ttf.columns = columns

        pw.println(new SimpleTemplateEngine().createTemplate(ttf.template).make([rows:stats]).toString())
        return sw.toString()
    }

    class TemplateFactory {
        def columns = []

        @SuppressWarnings("GroovyAssignabilityCheck")
        @CompileStatic(TypeCheckingMode.SKIP)
        String getTemplate() { """
${columns.collect{ " <%print \"$it.displayName\".center($it.size)%> " }.join()}
${columns.collect{ " <%print \"_\"*$it.size %> " }.join()}
<% rows.each {%>${columns.collect{ " \${it.${it.name}.toString().padRight($it.size).substring(0,$it.size)} " }.join()}
<% } %>"""
        }
    }

    //~---------------------------------------< utils >

    static class DiffIterator extends AbstractIterator<String> {
        final PeekingIterator<String> peekB;
        final Iterator<String> b
        final Iterator<String> a

        DiffIterator(Iterable<String> a, Iterable<String> b){
            this(a.iterator(), b.iterator())
        }

        DiffIterator(Iterator<String> a, Iterator<String> b){
            this.b = b
            this.peekB = Iterators.peekingIterator(b)
            this.a = a
        }

        @Override
        protected String computeNext() {
            String diff = computeNextDiff();
            if (diff == null) {
                return endOfData();
            }
            return diff;
        }

        private String computeNextDiff() {
            if (!a.hasNext()) {
                return null;
            }

            //Marked finish the rest of all are part of diff
            if (!peekB.hasNext()) {
                return a.next();
            }

            String diff = null;
            while (a.hasNext() && diff == null) {
                diff = a.next();
                while (peekB.hasNext()) {
                    String marked = peekB.peek();
                    int comparisonResult = diff.compareTo(marked);
                    if (comparisonResult > 0) {
                        //Extra entries in marked. Ignore them and move on
                        peekB.next();
                    } else if (comparisonResult == 0) {
                        //Matching entry found in marked move past it. Not a
                        //dif candidate
                        peekB.next();
                        diff = null;
                        break;
                    } else {
                        //This entry is not found in marked entries
                        //hence part of diff
                        return diff;
                    }
                }
            }
            return diff;
        }
    }

    static String withSuffix(long count) {
        if (count < 1000) return "" + count;
        int exp = (int) (Math.log(count) / Math.log(1000));
        return String.format("%.1f %c",
                count / Math.pow(1000, exp),
                "kMGTPE".charAt(exp-1));
    }

    static String withSizeSuffix(long size){
        humanReadableByteCount(size)
    }
}

new RepoStats(nodeStore: session.store).dumpStats()


## output.txt
Number of NodeStates
	Total     : 498436
	in index  : 244266
	in system : 92064
	in JCR    : 162104

Number of NodeStates under /jcr:system
	Total                               : 92064
	in /jcr:system/jcr:versionStorage    : 86684
	in /jcr:system/rep:permissionStore   : 1944

Report generated on :Thu Oct 27 16:46:35 IST 2016

Nodetype stats


                     Name                         Count
 _____________________________________________  __________
 rep:versionStorage                             67405
 nt:unstructured                                65078
 nt:file                                        25209
 nt:resource                                    25173
 nt:frozenNode                                  11557

 ...Stats around indexes

 Overall index stats


 Entry Count  Child Count                      Name
 __________  __________  _____________________________________________
 28267       136478      slingResourceType
 43763       43763       uuid
 3973        17566       type
 3618        6472        nodetype
 1038        3418        slingResourceSuperType
	Number of NodeStates
	Total : 498436
	in index : 244266
	in system : 92064
	in JCR : 162104

	Number of NodeStates under /jcr:system
	Total : 92064
	in /jcr:system/jcr:versionStorage : 86684
	in /jcr:system/rep:permissionStore : 1944

	Report generated on :Thu Oct 27 16:46:35 IST 2016

	Nodetype stats


	Name Count
	_____________________________________________ __________
	rep:versionStorage 67405
	nt:unstructured 65078
	nt:file 25209
	nt:resource 25173
	nt:frozenNode 11557

	...Stats around indexes

	Overall index stats


	Entry Count Child Count Name
	__________ __________ _____________________________________________
	28267 136478 slingResourceType
	43763 43763 uuid
	3973 17566 type
	3618 6472 nodetype
	1038 3418 slingResourceSuperType