Skip to content

Instantly share code, notes, and snippets.

@chetanmeh
Created June 23, 2016 09:41
Show Gist options
  • Save chetanmeh/ccaa10e6ee8c9b0775484e6a9f6ba06e to your computer and use it in GitHub Desktop.
Save chetanmeh/ccaa10e6ee8c9b0775484e6a9f6ba06e to your computer and use it in GitHub Desktop.
Script to analyze the content of property indexes. Number of entries, number of indexes etc on a fine grained basis
import com.google.common.base.Function
import com.google.common.base.Stopwatch
import com.google.common.collect.FluentIterable
import com.google.common.collect.TreeTraverser
import groovy.json.JsonOutput
import groovy.text.SimpleTemplateEngine
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
import org.apache.jackrabbit.oak.spi.state.NodeState
import static com.google.common.collect.Iterables.transform
class IndexStatsHelper {
def ns
def stats = []
def maxIndexedEntryCount = 10
def statsFileJson = new File("index-stats.json")
def statsFileTxt = new File("index-stats.txt")
def dumpStats(){
Stopwatch w = Stopwatch.createStarted()
ns.getRoot().getChildNode("oak:index").childNodeEntries.each{ChildNodeEntry cne ->
def idxName = cne.name
def idxState = cne.nodeState
def type = idxState.getString("type")
if (type == 'property' || type == 'disabled'){
print0("Processing $idxName")
boolean unique = idxState.getBoolean('unique')
def idxStats = [childCount: 0, entryCount: 0]
idxStats.name = idxName
idxStats.disabled = type == 'disabled'
idxStats.unique = unique
def contentNode = idxState.getChildNode(':index')
def indexedStats = []
if (unique) {
idxStats.childCount = getTraversor(contentNode).size()
idxStats.entryCount = idxStats.childCount
} else {
contentNode.childNodeEntries.each {ChildNodeEntry indexedStateEntry ->
def indexedEntryName = indexedStateEntry.name
def indexEntryStats = getIndexEntryStats(indexedStateEntry.nodeState)
idxStats.entryCount += indexEntryStats.entryCount
idxStats.childCount += indexEntryStats.childCount
indexedStats << [
name: indexedEntryName,
childCount : indexEntryStats.childCount,
entryCount: indexEntryStats.entryCount
]
}
}
indexedStats.sort {-it.childCount}
idxStats.indexedEntryCount = indexedStats.size()
indexedStats = indexedStats.subList(0, Math.min(indexedStats.size(), maxIndexedEntryCount))
print0(" $idxStats")
idxStats.indexedStats = indexedStats
stats << idxStats
}
}
stats.sort{-it.childCount}
statsFileTxt.withPrintWriter { pw ->
def header = "Overall index stats"
def output = dumpStats(header, stats)
print0(output)
pw.println(output)
stats.each { s ->
if (s.childCount > 0 && !s.unique) {
header = "Stats for [${s.name}]."
if (s.indexedEntryCount > maxIndexedEntryCount) {
header += " Listing top ${s.indexedStats.size()} out of total ${s.indexedEntryCount}"
}
output = dumpStats(header, s.indexedStats)
print0(output)
pw.println(output)
}
}
}
statsFileJson.text = JsonOutput.prettyPrint(JsonOutput.toJson(stats))
print0("Stats in json format dumped to ${statsFileJson.getAbsolutePath()}")
print0("Stats in tx format dumped to ${statsFileTxt.getAbsolutePath()}")
print0("Total time taken : $w")
}
def print0(def msg){
println(msg)
}
def getIndexEntryStats(NodeState idxNodeState){
FluentIterable<NodeState> itr = getTraversor(idxNodeState)
def stats = [entryCount : 0, childCount : 0]
itr.each {NodeState ns ->
if (ns.hasProperty('match')){
stats.entryCount++
}
stats.childCount++
}
return stats
}
FluentIterable<NodeState> getTraversor(NodeState ns ){
def traversor = new TreeTraverser<NodeState>(){
Iterable<NodeState> children(NodeState root) {
return transform(root.childNodeEntries, { ChildNodeEntry cne ->
cne.nodeState
} as Function)
}
}
return traversor.preOrderTraversal(ns)
}
def dumpStats(def header, def stats){
StringWriter sw = new StringWriter()
PrintWriter pw = new PrintWriter(sw)
pw.println(header)
pw.println()
stats.sort { -it.childCount }
def columns = [
[name:"entryCount",displayName:"Entry Count",size:10],
[name:"childCount",displayName:"Child Count",size:10],
[name:"name",displayName:"Name",size:45],
]
def ttf = new TemplateFactory()
ttf.columns = columns
pw.println(new SimpleTemplateEngine().createTemplate(ttf.template).make([rows:stats]).toString())
return sw.toString()
}
class TemplateFactory {
def columns = []
def getTemplate() { """
${columns.collect{ " <%print \"$it.displayName\".center($it.size)%> " }.join()}
${columns.collect{ " <%print \"_\"*$it.size %> " }.join()}
<% rows.each {%>${columns.collect{ " \${it.${it.name}.toString().padRight($it.size).substring(0,$it.size)} " }.join()}
<% } %>"""
}
}
}
new IndexStatsHelper(ns:session.store).dumpStats()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment