chetanmeh/listIndexFiles.groovy

## listIndexFiles.groovy
import groovy.json.JsonOutput
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
import org.apache.jackrabbit.oak.spi.state.NodeState
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.slf4j.LoggerFactory

//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs'

//~-------------------------------< Global Fields >

log = LoggerFactory.getLogger("script-console");
NodeStore ns = getStore()

def dumpDir = new File(dumpDirPath)
File idxData = new File(dumpDir, "index-data.json")

NodeState oakIndex = ns.root.getChildNode("oak:index")
def output = [:]

checkIndexContent(oakIndex, output)
checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)

idxData.withWriter {w ->
    w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
}
println("Output written to ${idxData.absolutePath}")
def checkIndexContent(NodeState oakIndex, output) {
    badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
    oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
        NodeState idxState = cne.nodeState
        def idxName = cne.name
        if (!badIndexes.contains(idxName)){
           return
        }

        if (idxName == 'lucene'){
            logMsg("Skipping ${idxName}")
            return
        }

        logMsg("Processing ${idxName}")
        def fileData = [:]

        boolean allFileValid = true
        idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
            String fileName = data.name
            def blobIds = []
            fileData[fileName] = blobIds
            def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
            binaries.each { Blob b ->
                String blobId = b.contentIdentity
                boolean valid
                try {
                    InputStream is = b.newStream
                    is?.close()
                    valid = true
                } catch (Exception ignore) {
                    valid = false
                    allFileValid = false
                }

                if (valid && blobId && blobId.startsWith('0x')){
                    return
                }

                blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
            }
        }

        output[idxName] = [files: fileData, valid: allFileValid]
    }
}


def logMsg(String msg){
    println msg
    log.info(msg)
}

def getStore(){
    osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}


## listIndexFiles2.groovy
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package felixconsole

import groovy.json.JsonOutput
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.api.Type
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
import org.apache.jackrabbit.oak.spi.state.NodeState
import org.apache.jackrabbit.oak.spi.state.NodeStore
import org.slf4j.LoggerFactory

//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs'

//~-------------------------------< Global Fields >


log = LoggerFactory.getLogger("script-console");
ns = getStore()

RevisionVector = loadClass('org.apache.jackrabbit.oak.plugins.document.RevisionVector')

def dumpDir = new File(dumpDirPath)
File idxData = new File(dumpDir, "index-data-old.json")

NodeState oakIndex = getOldRoot(ns).getChildNode("oak:index")
def output = [:]

checkIndexContent(oakIndex, output)
checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)

idxData.withWriter {w ->
    w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
}
badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
println("Output written to ${idxData.absolutePath}")
def checkIndexContent(NodeState oakIndex, output) {
    oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
        NodeState idxState = cne.nodeState
        def idxName = cne.name
        if (!badIndexes.contains(idxName)){
           return
        }

        logMsg("Processing ${idxName}")
        def fileData = [:]

        boolean allFileValid = true
        idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
            String fileName = data.name
            def blobIds = []
            fileData[fileName] = blobIds
            def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
            binaries.each { Blob b ->
                String blobId = b.contentIdentity
                boolean valid
                try {
                    InputStream is = b.newStream
                    is?.close()
                    valid = true
                } catch (Exception ignore) {
                    valid = false
                    allFileValid = false
                }

                if (valid && blobId && blobId.startsWith('0x')){
                    return
                }

                blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
            }
        }

        output[idxName] = [files: fileData, valid: allFileValid]
    }
}


def logMsg(String msg){
    println msg
    log.info(msg)
}

def getStore(){
    osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}


NodeState getOldRoot(NodeStore nodeStore) {
    String rev = 'r159b54ef411-0-1,r159b54ef411-0-2,r159b54ef411-0-3,r159b54ef411-0-4'
    NodeState state = nodeStore.getRoot(RevisionVector.fromString(rev))
    assert state : "No state found at checkpoint $rev"
    return state
}

Class loadClass(String className){
    return ns.class.classLoader.loadClass(className)
}
	import groovy.json.JsonOutput
	import org.apache.jackrabbit.oak.api.Blob
	import org.apache.jackrabbit.oak.api.Type
	import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
	import org.apache.jackrabbit.oak.spi.state.NodeState
	import org.apache.jackrabbit.oak.spi.state.NodeStore
	import org.slf4j.LoggerFactory

	//Directory path under which dump data would be written
	def dumpDirPath = 'crx-quickstart/logs'

	//~-------------------------------< Global Fields >

	log = LoggerFactory.getLogger("script-console");
	NodeStore ns = getStore()

	def dumpDir = new File(dumpDirPath)
	File idxData = new File(dumpDir, "index-data.json")

	NodeState oakIndex = ns.root.getChildNode("oak:index")
	def output = [:]

	checkIndexContent(oakIndex, output)
	checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)

	idxData.withWriter {w ->
	w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
	}
	println("Output written to ${idxData.absolutePath}")
	def checkIndexContent(NodeState oakIndex, output) {
	badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
	oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
	NodeState idxState = cne.nodeState
	def idxName = cne.name
	if (!badIndexes.contains(idxName)){
	return
	}

	if (idxName == 'lucene'){
	logMsg("Skipping ${idxName}")
	return
	}

	logMsg("Processing ${idxName}")
	def fileData = [:]

	boolean allFileValid = true
	idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
	String fileName = data.name
	def blobIds = []
	fileData[fileName] = blobIds
	def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
	binaries.each { Blob b ->
	String blobId = b.contentIdentity
	boolean valid
	try {
	InputStream is = b.newStream
	is?.close()
	valid = true
	} catch (Exception ignore) {
	valid = false
	allFileValid = false
	}

	if (valid && blobId && blobId.startsWith('0x')){
	return
	}

	blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
	}
	}

	output[idxName] = [files: fileData, valid: allFileValid]
	}
	}



	def logMsg(String msg){
	println msg
	log.info(msg)
	}

	def getStore(){
	osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
	}
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package felixconsole

	import groovy.json.JsonOutput
	import org.apache.jackrabbit.oak.api.Blob
	import org.apache.jackrabbit.oak.api.Type
	import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry
	import org.apache.jackrabbit.oak.spi.state.NodeState
	import org.apache.jackrabbit.oak.spi.state.NodeStore
	import org.slf4j.LoggerFactory

	//Directory path under which dump data would be written
	def dumpDirPath = 'crx-quickstart/logs'

	//~-------------------------------< Global Fields >



	log = LoggerFactory.getLogger("script-console");
	ns = getStore()

	RevisionVector = loadClass('org.apache.jackrabbit.oak.plugins.document.RevisionVector')

	def dumpDir = new File(dumpDirPath)
	File idxData = new File(dumpDir, "index-data-old.json")

	NodeState oakIndex = getOldRoot(ns).getChildNode("oak:index")
	def output = [:]

	checkIndexContent(oakIndex, output)
	checkIndexContent(ns.root.getChildNode("content").getChildNode("oak:index"), output)

	idxData.withWriter {w ->
	w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output)))
	}
	badIndexes = ['damAssetLucene', 'hpeAuthorDamAsset', 'hpeSlingResource']
	println("Output written to ${idxData.absolutePath}")
	def checkIndexContent(NodeState oakIndex, output) {
	oakIndex.childNodeEntries.each { ChildNodeEntry cne ->
	NodeState idxState = cne.nodeState
	def idxName = cne.name
	if (!badIndexes.contains(idxName)){
	return
	}

	logMsg("Processing ${idxName}")
	def fileData = [:]

	boolean allFileValid = true
	idxState.getChildNode(':data').childNodeEntries.each { ChildNodeEntry data ->
	String fileName = data.name
	def blobIds = []
	fileData[fileName] = blobIds
	def binaries = data.nodeState.getProperty('jcr:data').getValue(Type.BINARIES)
	binaries.each { Blob b ->
	String blobId = b.contentIdentity
	boolean valid
	try {
	InputStream is = b.newStream
	is?.close()
	valid = true
	} catch (Exception ignore) {
	valid = false
	allFileValid = false
	}

	if (valid && blobId && blobId.startsWith('0x')){
	return
	}

	blobIds << [blobId: blobId, valid: valid, key: data.nodeState.getString('uniqueKey')]
	}
	}

	output[idxName] = [files: fileData, valid: allFileValid]
	}
	}



	def logMsg(String msg){
	println msg
	log.info(msg)
	}

	def getStore(){
	osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
	}


	NodeState getOldRoot(NodeStore nodeStore) {
	String rev = 'r159b54ef411-0-1,r159b54ef411-0-2,r159b54ef411-0-3,r159b54ef411-0-4'
	NodeState state = nodeStore.getRoot(RevisionVector.fromString(rev))
	assert state : "No state found at checkpoint $rev"
	return state
	}

	Class loadClass(String className){
	return ns.class.classLoader.loadClass(className)
	}