Skip to content

Instantly share code, notes, and snippets.

@chetanmeh
Last active March 12, 2022 15:40
Show Gist options
  • Save chetanmeh/b05b9fbbe6078a4b9840eaf62f05bf9a to your computer and use it in GitHub Desktop.
Save chetanmeh/b05b9fbbe6078a4b9840eaf62f05bf9a to your computer and use it in GitHub Desktop.
Lucene index files details
import groovy.json.JsonOutput
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
import org.apache.jackrabbit.oak.spi.state.NodeState
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.slf4j.LoggerFactory
//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs'
//~-------------------------------< Global Fields >
log = LoggerFactory.getLogger("script-console");
NodeStore ns = getStore()
def dumpDir = new File(dumpDirPath)
File idxData = new File(dumpDir, "index-data.json")
NodeState oakIndex = ns.root.getChildNode("oak:index")
def output = [:]
checkIndexContent(oakIndex, output)
checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)
idxData.withWriter {w ->
w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
}
println("Output written to ${idxData.absolutePath}")
def checkIndexContent(NodeState oakIndex, output) {
badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
NodeState idxState = cne.nodeState
def idxName = cne.name
if (!badIndexes.contains(idxName)){
return
}
if (idxName == 'lucene'){
logMsg("Skipping ${idxName}")
return
}
logMsg("Processing ${idxName}")
def fileData = [:]
boolean allFileValid = true
idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
String fileName = data.name
def blobIds = []
fileData[fileName] = blobIds
def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
binaries.each { Blob b ->
String blobId = b.contentIdentity
boolean valid
try {
InputStream is = b.newStream
is?.close()
valid = true
} catch (Exception ignore) {
valid = false
allFileValid = false
}
if (valid && blobId && blobId.startsWith('0x')){
return
}
blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
}
}
output[idxName] = [files: fileData, valid: allFileValid]
}
}
def logMsg(String msg){
println msg
log.info(msg)
}
def getStore(){
osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package felixconsole
import groovy.json.JsonOutput
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
import org.apache.jackrabbit.oak.spi.state.NodeState
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.slf4j.LoggerFactory
//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs'
//~-------------------------------< Global Fields >
log = LoggerFactory.getLogger("script-console");
ns = getStore()
RevisionVector = loadClass('org.apache.jackrabbit.oak.plugins.document.RevisionVector')
def dumpDir = new File(dumpDirPath)
File idxData = new File(dumpDir, "index-data-old.json")
NodeState oakIndex = getOldRoot(ns).getChildNode("oak:index")
def output = [:]
checkIndexContent(oakIndex, output)
checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)
idxData.withWriter {w ->
w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
}
badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
println("Output written to ${idxData.absolutePath}")
def checkIndexContent(NodeState oakIndex, output) {
oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
NodeState idxState = cne.nodeState
def idxName = cne.name
if (!badIndexes.contains(idxName)){
return
}
logMsg("Processing ${idxName}")
def fileData = [:]
boolean allFileValid = true
idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
String fileName = data.name
def blobIds = []
fileData[fileName] = blobIds
def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
binaries.each { Blob b ->
String blobId = b.contentIdentity
boolean valid
try {
InputStream is = b.newStream
is?.close()
valid = true
} catch (Exception ignore) {
valid = false
allFileValid = false
}
if (valid && blobId && blobId.startsWith('0x')){
return
}
blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
}
}
output[idxName] = [files: fileData, valid: allFileValid]
}
}
def logMsg(String msg){
println msg
log.info(msg)
}
def getStore(){
osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}
NodeState getOldRoot(NodeStore nodeStore) {
String rev = 'r159b54ef411-0-1,r159b54ef411-0-2,r159b54ef411-0-3,r159b54ef411-0-4'
NodeState state = nodeStore.getRoot(RevisionVector.fromString(rev))
assert state : "No state found at checkpoint $rev"
return state
}
Class loadClass(String className){
return ns.class.classLoader.loadClass(className)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment