Skip to content

Instantly share code, notes, and snippets.

@andrewmkhoury
Last active April 29, 2021 08:19
Show Gist options
  • Save andrewmkhoury/3a3b5e69900f4e873743 to your computer and use it in GitHub Desktop.
Save andrewmkhoury/3a3b5e69900f4e873743 to your computer and use it in GitHub Desktop.
Oak Datastore Consistency Check
import org.apache.jackrabbit.oak.spi.commit.CommitInfo
import org.apache.jackrabbit.oak.spi.commit.EmptyHook
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.apache.jackrabbit.oak.commons.PathUtils
import com.google.common.collect.Lists
import java.util.List
import java.io.File;
public class BlobChecker {
def dsPath = "./crx-quickstart/repository/repository/datastore";
def outputAll = false;
public BlobChecker(String datastorePath, boolean outputAll) {
dsPath = datastorePath;
this.outputAll = outputAll;
if(!(new File(dsPath).exists())) {
throw new Exception("Datastore folder doesn't exist");
}
}
public checkBlobs(def session, def argpath) {
if (!PathUtils.isValid(argpath)) {
println("Not a valid path: " + argpath);
return;
}
String path;
if (PathUtils.isAbsolute(argpath)) {
path = argpath;
} else {
path = PathUtils.concat(session.getWorkingPath(), argpath);
}
List<String> elements = Lists.newArrayList();
PathUtils.elements(path).each{String element ->
if (PathUtils.denotesParent(element)) {
if (!elements.isEmpty()) {
elements.remove(elements.size() - 1);
}
} else if (!PathUtils.denotesCurrent(element)) {
elements.add(element);
}
}
NodeStore nstore = session.store
def rs = nstore.root
def ns = rs
def rnb = rs.builder()
//def nb = rnb;
def nodeName;
elements.each {
if(it.size() > 0) {
ns = ns.getChildNode(it)
nodeName = it;
}
if (PathUtils.isAbsolute(argpath)) {
path = argpath;
} else {
path = PathUtils.concat(session.getWorkingPath(), argpath);
}
List<String> elements = Lists.newArrayList();
PathUtils.elements(path).each{String element ->
if (PathUtils.denotesParent(element)) {
if (!elements.isEmpty()) {
elements.remove(elements.size() - 1);
}
} else if (!PathUtils.denotesCurrent(element)) {
elements.add(element);
}
}
NodeStore nstore = session.store
def rs = nstore.root
def ns = rs
def rnb = rs.builder()
//def nb = rnb;
def nodeName;
elements.each {
if(it.size() > 0) {
ns = ns.getChildNode(it)
nodeName = it;
}
}
checkBlobsRecurse(session, nstore, argpath, ns, nodeName);
/*def entryIter = ns.getChildNodeEntries()
entryIter.each {
println("checking " + argpath + "/" + it.getName());
checkBlobsRecurse(session, nstore, argpath + "/" + it.getName(), ns, it)
}*/
session.refresh();
}
private checkBlobsRecurse(def session, def nodeStore, def curPath, def ns, def nodeName) {
def props = ns.getProperties();
props.each { prop ->
try {
//println("checking " + curPath + "/" + nodeName + "/@" + prop.getName() + " - type: " + prop.getType());
if("BINARY".equals(prop.getType().toString()) || "BINARIES".equals(prop.getType().toString())) {
String blobId;
boolean isMulti = "BINARIES".equals(prop.getType().toString());
if(isMulti) {
for(def i = 0; i < prop.count(); i++) {
blobId = new SegmentBlob(prop.getValue(org.apache.jackrabbit.oak.api.Type.BINARY, i).getRecordId()).getBlobId();
checkBlob(blobId, curPath, prop);
}
} else {
blobId = new SegmentBlob(prop.getRecordId()).getBlobId();
}
checkBlobsRecurse(session, nstore, argpath, ns, nodeName);
/*def entryIter = ns.getChildNodeEntries()
entryIter.each {
println("checking " + argpath + "/" + it.getName());
checkBlobsRecurse(session, nstore, argpath + "/" + it.getName(), ns, it)
}*/
session.refresh();
}
private checkBlobsRecurse(def session, def nodeStore, def curPath, def ns, def nodeName) {
def props = ns.getProperties();
props.each { prop ->
try {
//println("checking " + curPath + "/" + nodeName + "/@" + prop.getName() + " - type: " + prop.getType());
if("BINARY".equals(prop.getType().toString()) || "BINARIES".equals(prop.getType().toString())) {
String blobId;
boolean isMulti = "BINARIES".equals(prop.getType().toString());
if(isMulti) {
for(def i = 0; i < prop.count(); i++) {
blobId = new SegmentBlob(prop.getValue(org.apache.jackrabbit.oak.api.Type.BINARY, i).getRecordId()).getBlobId();
checkBlob(blobId, curPath, prop);
}
} else {
blobId = new SegmentBlob(prop.getRecordId()).getBlobId();
checkBlob(blobId, curPath, prop);
}
}
} catch (Exception e) {
println(e);
}
}
def entryIter = ns.getChildNodeEntries();
entryIter.each {
checkBlobsRecurse(session, nodeStore, curPath + "/" + it.getName(), it.getNodeState(), nodeName)
}
}
private checkBlob(def blobId, def curPath, def prop) {
if(blobId != null) {
String blobFileName = blobId.substring(0,40);
File f = new File(dsPath + "/" + blobId.substring(0, 2) + "/" + blobId.substring(2, 4) + "/" + blobId.substring(4, 6) + "/" + blobFileName);
if(!f.exists()) {
print("MISSING BLOB! - " + f.getPath() + " for node: ");
println(curPath + " property: @" + prop.getName() + " - blobId: " +
blobId + ", filename=" + blobFileName);
} else if(outputAll) {
println(curPath + " property: @" + prop.getName() + " - blobId: " +
blobId + ", filename=" + blobFileName);
}
}
}
}
  1. Download the oak-run version that matches what is installed from http://repo1.maven.org/maven2/org/apache/jackrabbit/oak-run/
  2. Stop all AEM instances in cluster
  3. Upload the oak-run jar to the AEM server
  4. Run this command to start the oak console
  • on TarMK:
   java -Xmx4g -jar target/oak-run.jar console --quiet /path/to/segmentstore
  • For MongoMK run this:
   java -Xmx4g -jar oak-run.jar console mongodb://localhost/aem-author
  1. Run these commands to run the missing blob check on the out of the box AEM6.1 lucene index paths. Note that you would need to specify the full path of your repository's datastore.
:load checkBlobs.groovy
new BlobChecker("/opt/aem6/author/crx-quickstart/repository/repository/datastore", false).checkBlobs(session, "/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment