chetanmeh/dumpContent.groovy

## dumpContent.groovy
package felixconsole

import org.apache.jackrabbit.oak.commons.IOUtils as OakIOUtils
import org.apache.commons.io.FileUtils
import org.apache.commons.io.IOUtils
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.commons.PathUtils
import org.slf4j.LoggerFactory

/**
 * This script is meant to extract and dump NodeDocument instances via DocumentStore
 * API to a file and also copy binary files if enabled. Once executed
 * the dump data would be stored under `dumpDirPath`
 */
//~------------------------------< config >

//NodeStore path whose content are to be dumped
def path = "/oak:index/lucene/:data"

//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs/dump'
def dumpFileName = "dump.txt"

//Set to true to also dump binary content
def dumpBinaries = true

//If set to true then immediate child nodes would also be
//dumped
def includeChildDocs = true

//~-------------------------------< Global Fields >

log = LoggerFactory.getLogger("script-console");
ns = getStore()
ds = ns.documentStore //DocumentNodeStore
bs = ns.blobStore

//----------------------<Create ref to private classes/Non Exported>

DocCollection = loadClass('org.apache.jackrabbit.oak.plugins.document.Collection')
Revision = loadClass('org.apache.jackrabbit.oak.plugins.document.Revision')
Utils = loadClass('org.apache.jackrabbit.oak.plugins.document.util.Utils')
blobCollector = loadClass('org.apache.jackrabbit.oak.plugins.document.BlobCollector').newInstance(ns)

def dumpDir = new File(dumpDirPath)
FileUtils.forceMkdir(dumpDir)
logMsg("Files would be copied to ${dumpDir.absolutePath}")

File dump = new File(dumpDir, dumpFileName)
def blobs = []
dump.withPrintWriter {pw ->
    dumpPathAndAncestors(path, pw, blobs)
    if (includeChildDocs){
        ns.readChildDocs(path, null, Integer.MAX_VALUE).each {doc ->
            dumpDoc(doc, pw, blobs)
        }
    }
}

if (dumpBinaries){
    def processedBlobs = new HashSet()
    def binDir = new File(dumpDir, "blobs")
    FileUtils.forceMkdir(binDir)
    def blobCount = 0, blobSize = 0
    blobs.each{/*ReferencedBlob*/ refBlob ->
        // Code in 1.2 and 1.0 differs so workaround that
        def blob = refBlob instanceof Blob ? refBlob : refBlob.blob
        def id = null
        if (refBlob.class.name.endsWith('BlobStoreBlob')){
            id = refBlob.blobId
        } else if (refBlob.class.name.endsWith('ReferencedBlob')){
            id = refBlob.id
        }

        if (!id){
            logMsg("Ignoring blob of type ${refBlob.class}")
        }

        if (processedBlobs.contains(id)){
            return
        }

        try {
            def l = blob.length()
            //Only dump non inline binaries
            if (l >= bs.blockSizeMin) {
                File f = new File(binDir, id)
                copyBlob(blob, f)
                blobCount++
                blobSize += l
                processedBlobs << id
            }
        } catch (Exception e){
            println "Error on ${id}"
            log.warn("Failed on blob {}", id, e)
        }

    }
    logMsg("Copied $blobCount blobs of size ${OakIOUtils.humanReadableByteCount(blobSize)} " +
            "to ${binDir.absolutePath}")
}

logMsg("Dumped docs to ${dump.absolutePath}")

def dumpPathAndAncestors(String path, PrintWriter pw, def blobs){
    def paths = []
    int depth = PathUtils.getDepth(path)
    (0..depth).each { d ->
        String ancestorPath = PathUtils.getAncestorPath(path, d);
        paths << ancestorPath
    }

    paths.each{String p ->
        logMsg("Dumping $p")
        dumpPath(p, pw, blobs)
    }
}

def dumpPath(String path, PrintWriter pw, def blobs){
    def id = Utils.getIdFromPath(path)
    def/*NodeDocument*/ doc = ds.find(DocCollection.NODES, id)
    dumpDoc(doc, pw, blobs)
}

def dumpDoc(def doc, PrintWriter pw, def blobs){
    assert doc : "No document found for [$id]"
    pw.println(doc.asString())
    doc.getAllPreviousDocs().each {prevDoc ->
        pw.println(prevDoc.asString())
        blobCollector.collect(prevDoc, blobs)
    }
    blobCollector.collect(doc, blobs)
}

def logMsg(String msg){
    println msg
    log.info(msg)
}

def getStore(){
    osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}

def copyBlob(Blob b, File outFile) {
    outFile.withOutputStream { os ->
        InputStream is = b.newStream
        try {
            IOUtils.copyLarge(is, os)
        } finally {
            is?.close()
        }
    }
}

Class loadClass(String className){
    return ns.class.classLoader.loadClass(className)
}
	package felixconsole

	import org.apache.jackrabbit.oak.commons.IOUtils as OakIOUtils
	import org.apache.commons.io.FileUtils
	import org.apache.commons.io.IOUtils
	import org.apache.jackrabbit.oak.api.Blob
	import org.apache.jackrabbit.oak.commons.PathUtils
	import org.slf4j.LoggerFactory

	/**
	* This script is meant to extract and dump NodeDocument instances via DocumentStore
	* API to a file and also copy binary files if enabled. Once executed
	* the dump data would be stored under `dumpDirPath`
	*/
	//~------------------------------< config >

	//NodeStore path whose content are to be dumped
	def path = "/oak:index/lucene/:data"

	//Directory path under which dump data would be written
	def dumpDirPath = 'crx-quickstart/logs/dump'
	def dumpFileName = "dump.txt"

	//Set to true to also dump binary content
	def dumpBinaries = true

	//If set to true then immediate child nodes would also be
	//dumped
	def includeChildDocs = true

	//~-------------------------------< Global Fields >

	log = LoggerFactory.getLogger("script-console");
	ns = getStore()
	ds = ns.documentStore //DocumentNodeStore
	bs = ns.blobStore

	//----------------------<Create ref to private classes/Non Exported>

	DocCollection = loadClass('org.apache.jackrabbit.oak.plugins.document.Collection')
	Revision = loadClass('org.apache.jackrabbit.oak.plugins.document.Revision')
	Utils = loadClass('org.apache.jackrabbit.oak.plugins.document.util.Utils')
	blobCollector = loadClass('org.apache.jackrabbit.oak.plugins.document.BlobCollector').newInstance(ns)

	def dumpDir = new File(dumpDirPath)
	FileUtils.forceMkdir(dumpDir)
	logMsg("Files would be copied to ${dumpDir.absolutePath}")

	File dump = new File(dumpDir, dumpFileName)
	def blobs = []
	dump.withPrintWriter {pw ->
	dumpPathAndAncestors(path, pw, blobs)
	if (includeChildDocs){
	ns.readChildDocs(path, null, Integer.MAX_VALUE).each {doc ->
	dumpDoc(doc, pw, blobs)
	}
	}
	}

	if (dumpBinaries){
	def processedBlobs = new HashSet()
	def binDir = new File(dumpDir, "blobs")
	FileUtils.forceMkdir(binDir)
	def blobCount = 0, blobSize = 0
	blobs.each{/ReferencedBlob/ refBlob ->
	// Code in 1.2 and 1.0 differs so workaround that
	def blob = refBlob instanceof Blob ? refBlob : refBlob.blob
	def id = null
	if (refBlob.class.name.endsWith('BlobStoreBlob')){
	id = refBlob.blobId
	} else if (refBlob.class.name.endsWith('ReferencedBlob')){
	id = refBlob.id
	}

	if (!id){
	logMsg("Ignoring blob of type ${refBlob.class}")
	}

	if (processedBlobs.contains(id)){
	return
	}

	try {
	def l = blob.length()
	//Only dump non inline binaries
	if (l >= bs.blockSizeMin) {
	File f = new File(binDir, id)
	copyBlob(blob, f)
	blobCount++
	blobSize += l
	processedBlobs << id
	}
	} catch (Exception e){
	println "Error on ${id}"
	log.warn("Failed on blob {}", id, e)
	}

	}
	logMsg("Copied $blobCount blobs of size ${OakIOUtils.humanReadableByteCount(blobSize)} " +
	"to ${binDir.absolutePath}")
	}

	logMsg("Dumped docs to ${dump.absolutePath}")

	def dumpPathAndAncestors(String path, PrintWriter pw, def blobs){
	def paths = []
	int depth = PathUtils.getDepth(path)
	(0..depth).each { d ->
	String ancestorPath = PathUtils.getAncestorPath(path, d);
	paths << ancestorPath
	}

	paths.each{String p ->
	logMsg("Dumping $p")
	dumpPath(p, pw, blobs)
	}
	}

	def dumpPath(String path, PrintWriter pw, def blobs){
	def id = Utils.getIdFromPath(path)
	def/NodeDocument/ doc = ds.find(DocCollection.NODES, id)
	dumpDoc(doc, pw, blobs)
	}

	def dumpDoc(def doc, PrintWriter pw, def blobs){
	assert doc : "No document found for [$id]"
	pw.println(doc.asString())
	doc.getAllPreviousDocs().each {prevDoc ->
	pw.println(prevDoc.asString())
	blobCollector.collect(prevDoc, blobs)
	}
	blobCollector.collect(doc, blobs)
	}

	def logMsg(String msg){
	println msg
	log.info(msg)
	}

	def getStore(){
	osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
	}

	def copyBlob(Blob b, File outFile) {
	outFile.withOutputStream { os ->
	InputStream is = b.newStream
	try {
	IOUtils.copyLarge(is, os)
	} finally {
	is?.close()
	}
	}
	}

	Class loadClass(String className){
	return ns.class.classLoader.loadClass(className)
	}