Skip to content

Instantly share code, notes, and snippets.

@andrewmkhoury
Created May 5, 2020 21:18
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewmkhoury/ca7d53862c3af8214e42609454d19291 to your computer and use it in GitHub Desktop.
Save andrewmkhoury/ca7d53862c3af8214e42609454d19291 to your computer and use it in GitHub Desktop.
Apache Oak Groovy Script to find large binary properties in Oak JCR
//Adaptation of @stillalex's script from here https://gist.github.com/stillalex/06303f8cc1d3780d3eab4c72575883ae
//This version works with Oak 1.6 and later versions
import java.io.InputStream;
import java.util.concurrent.atomic.AtomicInteger
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.spi.state.NodeState
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.apache.jackrabbit.oak.commons.PathUtils
import com.google.common.collect.Lists
import java.util.List
org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger("countNodes.groovy");
def countNodes(NodeState n, deep = false, String path = "/", Integer flush = 1000, AtomicInteger count = new AtomicInteger(0), AtomicInteger binaries = new AtomicInteger(0), root = true) {
org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger("countNodes.groovy");
if(root) {
log.info("Counting nodes in tree ${path}");
}
cnt = count.incrementAndGet()
if (cnt % flush == 0) log.info(" " + cnt)
try {
for(prop in n.getProperties()) {
try {
if(prop.getType() == Type.BINARIES) {
for(b in prop.getValue(Type.BINARIES)) {
if(b.length() > 10000000) log.info("Binary ${path}/@${prop.getName()}: " + b.length()/1024/1024 + " MB");
binaries.incrementAndGet();
}
} else if(prop.getType() == Type.BINARY) {
def b = prop.getValue(Type.BINARY);
if(b.length() > 10000000) log.info("Binary ${path}/@${prop.getName()}: " + b.length()/1024/1024 + " MB");
binaries.incrementAndGet();
} else {
// Check regular properties for missing segments
if(prop.isArray()) {
for(sf in prop.getValue(prop.getType())) {
// do nothing - we just need to read all values
}
} else {
prop.getValue(prop.getType());
}
}
} catch(e) {
log.error("warning unable to read node properties ${path} ${prop.name}: " + e.getMessage())
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out)
}
}
try {
for(child in n.getChildNodeEntries()) {
try {
if(child.getName().equals(":index")) {
log.info("Skipping sync index at ${path}/${child.getName()}");
continue;
}
countNodes(child.getNodeState(), deep, path + "/" + child.getName(), flush, count, binaries, false)
} catch(e) {
log.error("warning unable to read child node ${path} : " + e.getMessage())
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out)
}
}
} catch(e) {
log.error("warning unable to read child entries ${path} : " + e.getMessage())
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out)
}
} catch(e) {
log.error("warning unable to read node ${path} : " + e.getMessage())
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out)
}
if(root) {
log.info("Total nodes in tree ${path}: ${cnt}");
log.info("Total binaries in tree ${path}: ${binaries.get()}");
}
return cnt
}
def countNodes(session, path, deep) {
NodeStore nstore = session.getRootNode().sessionDelegate.root.store
def rs = nstore.root
def rnb = rs.builder()
def nb = rnb;
List<String> elements = Lists.newArrayList();
PathUtils.elements(path).each{String element ->
if (PathUtils.denotesParent(element)) {
if (!elements.isEmpty()) {
elements.remove(elements.size() - 1);
}
} else if (!PathUtils.denotesCurrent(element)) {
elements.add(element);
}
}
elements.each {
if(it.size() > 0) {
nb = nb.getChildNode(it)
}
}
countNodes(nb.getNodeState(), deep, path)
}
def countNodes(path) {
def repo = osgi.getService(org.apache.sling.jcr.api.SlingRepository)
def session = repo.loginAdministrative(null)
try {
countNodes(session, path, true)
} finally {
session.logout()
}
}
log.info("Running node counter");
// Or only check the async oak indexes
t1 = Thread.start("countNodes /oak:index",{countNodes("/oak:index")})
log.info("Done starting countNodes threads");
t1.join();
log.info("Done running countNodes");
null
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment