Skip to content

Instantly share code, notes, and snippets.

@ctomc
Created September 18, 2017 09:57
Show Gist options
  • Save ctomc/83ea2528d7530f6790f009b3ebdd61b2 to your computer and use it in GitHub Desktop.
Save ctomc/83ea2528d7530f6790f009b3ebdd61b2 to your computer and use it in GitHub Desktop.
confluence to asciidoc
/*
* JBoss, Home of Professional Open Source
* Copyright 2017, Red Hat, Inc., and individual contributors as indicated
* by the @authors tag.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.21')
import org.htmlcleaner.*
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
def src = new File('html').toPath()
def dst = new File('asciidoc').toPath()
HtmlCleaner cleaner = new HtmlCleaner()
def props = cleaner.properties
props.translateSpecialEntities = false
props.setAdvancedXmlEscape(true)
props.setPruneTags("script")
TagTransformation divTransformer = new TagTransformation("div")
CleanerTransformations transformations = new CleanerTransformations();
//transformations.addTransformation(divTransformer)
transformations.addTransformation(new TagTransformation("thead"))
transformations.addTransformation(new TagTransformation("tbody"))
transformations.addTransformation(new TagTransformation("tfoot"))
transformations.addTransformation(new TagTransformation("header"))
transformations.addTransformation(new TagTransformation("span"))
transformations.addTransformation(new TagTransformation("code"))
/*transformations.addTransformation(new TagTransformation("h5", "h6",))
transformations.addTransformation(new TagTransformation("h4", "h5",))
transformations.addTransformation(new TagTransformation("h3", "h4",))
transformations.addTransformation(new TagTransformation("h2", "h3",))
transformations.addTransformation(new TagTransformation("h1", "h2",))*/
props.setCleanerTransformations(transformations)
def serializer = new PrettyHtmlSerializer(props);
static String guessLanguage(TagNode sourceTag) {
String source = sourceTag.getText()
if (source.contains("xmlns")
|| source.contains("<")
&& (
!source.contains("void")
&&!source.contains("static ")
&&!source.contains(".class")
&&!source.contains("new ")
&&!source.contains("final ")
)
) {
return "xml"
} else if (source.contains("import")
|| source.contains(".java")
|| source.contains("java.")
|| source.contains("assert")
|| source.contains("Assert.")
|| source.contains("new ")
|| source.contains("class")
|| source.contains("interface")
|| source.contains("package")
|| source.contains("private ")
|| source.contains("public ")
|| source.contains("static ")
|| source.contains("String ")
|| source.contains("Boolean ")
|| source.contains("boolean ")
|| source.contains("ModelNode ")
|| source.contains("newInstance")
|| source.contains("try")
|| source.contains("void ")
|| source.contains("catch")
|| source.contains("@Test")
|| source.contains("System.out")
|| source.contains("new ModelNode(")
|| source.contains("throw")
|| source.contains(".close();")
|| source.contains(".log")
|| source.contains("@EJB")
|| source.contains("@Stateless")
|| source.contains("log.")
|| source.contains("bsh ")
|| source.contains(".resolveModelAttribute")
|| source.contains(".parseAndSetParameter")
|| source.contains("KernelServices ")
|| source.contains(".registerOperationHandler")
|| source.contains(".registerSubsystemModel")
|| source.contains(".addChildResource")
|| source.contains("AttributeConverter ")
|| source.contains(".addOperationTransformationOverride")
|| source.contains("attributeBuilder")
|| source.contains("RejectAttributeChecker")
|| source.contains("PathAddress ")
|| source.contains("@Arquillian")
|| source.contains("DiscardAttributeChecker")
|| source.contains("PathElement")
|| source.contains("Transformation")
) {
return "java"
}else if (source.contains("outcome")
|| source.contains(":write-attribute")
|| source.contains(":add")
|| source.contains(":undefine")
|| source.contains(":read-")
|| source.contains("standalone@")
|| source.contains("domain@")
|| source.contains('"operation"')
|| source.contains('"children" =')
|| source.contains('"address"')
|| source.contains('model-description')
|| source.contains('reload')
|| source.contains('request-properties')
|| source.contains('reply-properties')
|| source.contains('value-type')
|| source.contains('value-type')
|| source.contains('"attributes" =')
|| source.contains('/subsystem=')
|| source.contains('/deployment=')
|| source.contains('"result" =')
|| source.contains('deploy ')
|| source.contains('"rollout-plan" =')
|| source.contains('"description" =')
|| source.contains('deployment-overlay ')
) {
return "ruby";
} else if (source.contains(".sh")
|| source.contains(".bat")
|| source.contains("cd ")
|| source.contains("mvn ")
|| source.contains("keytool")
|| source.contains("@Test")
|| source.contains("grep")
|| source.contains("cat")
|| source.contains("##")
|| source.contains(" matches")
|| source.contains("cp ")
|| source.contains("ls ")
|| source.contains("-D")
|| source.contains("curl ")
|| source.contains("sudo ")
|| source.contains("vi ")
|| source.contains("yum ")
|| source.contains("git ")
|| source.contains("rm ")
) {
return "bash"
}else if (source.matches("\\\\w+=[^\\\\s]+")){
println(" found props")
return ""
}else if (
source.contains("MSC service thread")
|| source.contains("[stdout] ")
|| source.contains("; legacy version: ")
|| source.contains("(DeploymentScanner-threads: ")
|| source.contains("DeploymentScanner-threads ")
|| source.contains("http://")
|| source.contains(".jar")
|| source.contains(".war")
){
return ""
}else if (source.contains("CREATE TABLE")){
return "sql"
}
//println "we have to guess: " + source
return ""
//return "java"
}
static String getAdmotionMapping(String confluenceType){
/*
note
tip
info
warning
*/
if (confluenceType.equals("note")){
return "NOTE"
}
if (confluenceType.equals("tip")){
return "TIP";
}
if (confluenceType.equals("info")){
return "IMPORTANT"
}
if (confluenceType.equals("warning")){
return "WARNING"
}
throw new RuntimeException("Could not map '"+confluenceType+"'")
}
src.toFile().eachFileRecurse { f ->
def relative = src.relativize(f.toPath())
def target = dst.resolve(relative)
if (f.isDirectory()) {
if (Files.notExists(target)) {
Files.createDirectory(target)
};
} else if (f.name.endsWith('.html')) {
def tmpHtml = Paths.get('clean', relative.toString()) //File.createTempFile('clean', 'html')
println "Converting $relative"
String html = new String(Files.readAllBytes(f.toPath()),StandardCharsets.UTF_8);
html = html
.replaceAll("<code class=\\\"value\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"plain\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"comments\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"keyword\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"string\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"color1\\\">(.*?)<\\/code>","\$1")
.replaceAll("<code class=\\\"preprocessor\\\">(.*?)<\\/code>","\$1")
.replaceAll("<div class=\\\"line\\\">(.*?)<\\/div>","\$1")
.replaceAll("<code>(.*?)<\\/code>","\$1")
def result = cleaner.clean(html)
TagNodeVisitor visitor = { parent, htmlNode ->
if (htmlNode instanceof TagNode) {
TagNode tagNode = (TagNode)htmlNode;
tagNode?.attributes?.remove 'class'
tagNode?.attributes?.remove 'id'
if ('td' == tagNode?.name || 'th' == tagNode?.name) {
tagNode.name = 'td'
String txt = tagNode.text
tagNode.removeAllChildren()
tagNode.insertChild(0, new ContentNode(txt))
}
if ('p' == tagNode?.name && 'title' == tagNode.attributes.id) {
tagNode.removeFromTree()
}
if ('ul' == tagNode?.name && 'docnav' == tagNode.getAttributeByName("class")) {
tagNode.removeFromTree()
print "ul processed"
}
if ('pre' == tagNode?.name) {
String className = tagNode.getAttributeByName("class")
if (className != null && className.contains("brush: java")) {
tagNode.removeAttribute("class")
tagNode.addAttribute("class", "java")
}
}
if ('div' == tagNode?.name) {
tagNode.removeAttribute("id")
String className = tagNode.getAttributeByName("class")
if (className != null) {
if (className.contains("admonition-body")) {
String parentClass = parent.getAttributeByName("class")
String adMotionLevel = parentClass.substring(parentClass.lastIndexOf("-")+1)
int idx = parent.getParent().getChildIndex(parent);
parent.getParent().insertChild(idx, tagNode);
List<TagNode> titleNodes = parent.getElementListByName("p",false);
parent.removeFromTree();
tagNode.removeAttribute("class")
tagNode.insertChild(0, new ContentNode("["+getAdmotionMapping(adMotionLevel)+"]"))
if (!titleNodes.isEmpty()) {
tagNode.insertChild(0, titleNodes.get(0))
//parent.getParent().insertChildBefore(tagNode, titleNodes.get(0))
}
} else if ("panel-content".equals(className)) {
tagNode.removeAttribute("class")
tagNode.insertChild(0, new ContentNode("****"))
tagNode.addChild(new ContentNode("****"))
print("merge careful, new panel format for file: ")
} else if (className.contains("syntaxhighlighter")) {
if (!"confbox programlisting".equals(parent.getAttributeByName("class"))){
println "something is really wrong"
throw new RuntimeException("bruh");
}
List<BaseToken> children = tagNode.getAllChildren();
int index = parent.getParent().getChildIndex(tagNode.getParent());
TagNode pre = new TagNode("pre")
pre.addChildren(children);
parent.getParent().insertChild(index, pre)
pre.addAttribute("class", guessLanguage(pre))
tagNode.getParent().removeFromTree();
}
}
}
}
true
} as TagNodeVisitor
result.traverse(visitor)
//result.traverse(divCleaner)
serializer.writeToFile(
result, tmpHtml.absolutePath, "utf-8"
)
//println "pandoc -f html -t asciidoctorj -R -S --normalize -s $tmpHtml -o ${target}.adoc"
//"pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target.getFileName()}.adoc".execute().waitFor()
def targetPath = dst.absolutePath + "\\${target.fileName.toString().substring(0, target.fileName.toString().length() - 5)}.adoc"
println targetPath
//println "pandoc --section-divs -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${targetPath}"
"pandoc -R -f html -R --atx-headers -t asciidoc -s $tmpHtml -o ${targetPath}".execute().waitFor()
//tmpHtml.delete()
} else {
if (Files.notExists(f.toPath())) {
Files.copy(f.toPath(), target)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment