Skip to content

Instantly share code, notes, and snippets.

@chetanmeh
Last active January 12, 2016 10:58
Show Gist options
  • Save chetanmeh/3f91baabf5c128dfceb0 to your computer and use it in GitHub Desktop.
Save chetanmeh/3f91baabf5c128dfceb0 to your computer and use it in GitHub Desktop.
Dump Document nodes and index files
package felixconsole
import org.apache.jackrabbit.oak.commons.IOUtils as OakIOUtils
import org.apache.commons.io.FileUtils
import org.apache.commons.io.IOUtils
import org.apache.jackrabbit.oak.api.Blob
import org.apache.jackrabbit.oak.commons.PathUtils
import org.slf4j.LoggerFactory
/**
* This script is meant to extract and dump NodeDocument instances via DocumentStore
* API to a file and also copy binary files if enabled. Once executed
* the dump data would be stored under `dumpDirPath`
*/
//~------------------------------< config >
//NodeStore path whose content are to be dumped
def path = "/oak:index/lucene/:data"
//Directory path under which dump data would be written
def dumpDirPath = 'crx-quickstart/logs/dump'
def dumpFileName = "dump.txt"
//Set to true to also dump binary content
def dumpBinaries = true
//If set to true then immediate child nodes would also be
//dumped
def includeChildDocs = true
//~-------------------------------< Global Fields >
log = LoggerFactory.getLogger("script-console");
ns = getStore()
ds = ns.documentStore //DocumentNodeStore
bs = ns.blobStore
//----------------------<Create ref to private classes/Non Exported>
DocCollection = loadClass('org.apache.jackrabbit.oak.plugins.document.Collection')
Revision = loadClass('org.apache.jackrabbit.oak.plugins.document.Revision')
Utils = loadClass('org.apache.jackrabbit.oak.plugins.document.util.Utils')
blobCollector = loadClass('org.apache.jackrabbit.oak.plugins.document.BlobCollector').newInstance(ns)
def dumpDir = new File(dumpDirPath)
FileUtils.forceMkdir(dumpDir)
logMsg("Files would be copied to ${dumpDir.absolutePath}")
File dump = new File(dumpDir, dumpFileName)
def blobs = []
dump.withPrintWriter {pw ->
dumpPathAndAncestors(path, pw, blobs)
if (includeChildDocs){
ns.readChildDocs(path, null, Integer.MAX_VALUE).each {doc ->
dumpDoc(doc, pw, blobs)
}
}
}
if (dumpBinaries){
def processedBlobs = new HashSet()
def binDir = new File(dumpDir, "blobs")
FileUtils.forceMkdir(binDir)
def blobCount = 0, blobSize = 0
blobs.each{/*ReferencedBlob*/ refBlob ->
// Code in 1.2 and 1.0 differs so workaround that
def blob = refBlob instanceof Blob ? refBlob : refBlob.blob
def id = null
if (refBlob.class.name.endsWith('BlobStoreBlob')){
id = refBlob.blobId
} else if (refBlob.class.name.endsWith('ReferencedBlob')){
id = refBlob.id
}
if (!id){
logMsg("Ignoring blob of type ${refBlob.class}")
}
if (processedBlobs.contains(id)){
return
}
try {
def l = blob.length()
//Only dump non inline binaries
if (l >= bs.blockSizeMin) {
File f = new File(binDir, id)
copyBlob(blob, f)
blobCount++
blobSize += l
processedBlobs << id
}
} catch (Exception e){
println "Error on ${id}"
log.warn("Failed on blob {}", id, e)
}
}
logMsg("Copied $blobCount blobs of size ${OakIOUtils.humanReadableByteCount(blobSize)} " +
"to ${binDir.absolutePath}")
}
logMsg("Dumped docs to ${dump.absolutePath}")
def dumpPathAndAncestors(String path, PrintWriter pw, def blobs){
def paths = []
int depth = PathUtils.getDepth(path)
(0..depth).each { d ->
String ancestorPath = PathUtils.getAncestorPath(path, d);
paths << ancestorPath
}
paths.each{String p ->
logMsg("Dumping $p")
dumpPath(p, pw, blobs)
}
}
def dumpPath(String path, PrintWriter pw, def blobs){
def id = Utils.getIdFromPath(path)
def/*NodeDocument*/ doc = ds.find(DocCollection.NODES, id)
dumpDoc(doc, pw, blobs)
}
def dumpDoc(def doc, PrintWriter pw, def blobs){
assert doc : "No document found for [$id]"
pw.println(doc.asString())
doc.getAllPreviousDocs().each {prevDoc ->
pw.println(prevDoc.asString())
blobCollector.collect(prevDoc, blobs)
}
blobCollector.collect(doc, blobs)
}
def logMsg(String msg){
println msg
log.info(msg)
}
def getStore(){
osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store
}
def copyBlob(Blob b, File outFile) {
outFile.withOutputStream { os ->
InputStream is = b.newStream
try {
IOUtils.copyLarge(is, os)
} finally {
is?.close()
}
}
}
Class loadClass(String className){
return ns.class.classLoader.loadClass(className)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment