Created
September 18, 2017 09:57
-
-
Save ctomc/83ea2528d7530f6790f009b3ebdd61b2 to your computer and use it in GitHub Desktop.
confluence to asciidoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* JBoss, Home of Professional Open Source | |
* Copyright 2017, Red Hat, Inc., and individual contributors as indicated | |
* by the @authors tag. | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.21') | |
import org.htmlcleaner.* | |
import java.nio.charset.StandardCharsets | |
import java.nio.file.Files | |
import java.nio.file.Paths | |
def src = new File('html').toPath() | |
def dst = new File('asciidoc').toPath() | |
HtmlCleaner cleaner = new HtmlCleaner() | |
def props = cleaner.properties | |
props.translateSpecialEntities = false | |
props.setAdvancedXmlEscape(true) | |
props.setPruneTags("script") | |
TagTransformation divTransformer = new TagTransformation("div") | |
CleanerTransformations transformations = new CleanerTransformations(); | |
//transformations.addTransformation(divTransformer) | |
transformations.addTransformation(new TagTransformation("thead")) | |
transformations.addTransformation(new TagTransformation("tbody")) | |
transformations.addTransformation(new TagTransformation("tfoot")) | |
transformations.addTransformation(new TagTransformation("header")) | |
transformations.addTransformation(new TagTransformation("span")) | |
transformations.addTransformation(new TagTransformation("code")) | |
/*transformations.addTransformation(new TagTransformation("h5", "h6",)) | |
transformations.addTransformation(new TagTransformation("h4", "h5",)) | |
transformations.addTransformation(new TagTransformation("h3", "h4",)) | |
transformations.addTransformation(new TagTransformation("h2", "h3",)) | |
transformations.addTransformation(new TagTransformation("h1", "h2",))*/ | |
props.setCleanerTransformations(transformations) | |
def serializer = new PrettyHtmlSerializer(props); | |
static String guessLanguage(TagNode sourceTag) { | |
String source = sourceTag.getText() | |
if (source.contains("xmlns") | |
|| source.contains("<") | |
&& ( | |
!source.contains("void") | |
&&!source.contains("static ") | |
&&!source.contains(".class") | |
&&!source.contains("new ") | |
&&!source.contains("final ") | |
) | |
) { | |
return "xml" | |
} else if (source.contains("import") | |
|| source.contains(".java") | |
|| source.contains("java.") | |
|| source.contains("assert") | |
|| source.contains("Assert.") | |
|| source.contains("new ") | |
|| source.contains("class") | |
|| source.contains("interface") | |
|| source.contains("package") | |
|| source.contains("private ") | |
|| source.contains("public ") | |
|| source.contains("static ") | |
|| source.contains("String ") | |
|| source.contains("Boolean ") | |
|| source.contains("boolean ") | |
|| source.contains("ModelNode ") | |
|| source.contains("newInstance") | |
|| source.contains("try") | |
|| source.contains("void ") | |
|| source.contains("catch") | |
|| source.contains("@Test") | |
|| source.contains("System.out") | |
|| source.contains("new ModelNode(") | |
|| source.contains("throw") | |
|| source.contains(".close();") | |
|| source.contains(".log") | |
|| source.contains("@EJB") | |
|| source.contains("@Stateless") | |
|| source.contains("log.") | |
|| source.contains("bsh ") | |
|| source.contains(".resolveModelAttribute") | |
|| source.contains(".parseAndSetParameter") | |
|| source.contains("KernelServices ") | |
|| source.contains(".registerOperationHandler") | |
|| source.contains(".registerSubsystemModel") | |
|| source.contains(".addChildResource") | |
|| source.contains("AttributeConverter ") | |
|| source.contains(".addOperationTransformationOverride") | |
|| source.contains("attributeBuilder") | |
|| source.contains("RejectAttributeChecker") | |
|| source.contains("PathAddress ") | |
|| source.contains("@Arquillian") | |
|| source.contains("DiscardAttributeChecker") | |
|| source.contains("PathElement") | |
|| source.contains("Transformation") | |
) { | |
return "java" | |
}else if (source.contains("outcome") | |
|| source.contains(":write-attribute") | |
|| source.contains(":add") | |
|| source.contains(":undefine") | |
|| source.contains(":read-") | |
|| source.contains("standalone@") | |
|| source.contains("domain@") | |
|| source.contains('"operation"') | |
|| source.contains('"children" =') | |
|| source.contains('"address"') | |
|| source.contains('model-description') | |
|| source.contains('reload') | |
|| source.contains('request-properties') | |
|| source.contains('reply-properties') | |
|| source.contains('value-type') | |
|| source.contains('value-type') | |
|| source.contains('"attributes" =') | |
|| source.contains('/subsystem=') | |
|| source.contains('/deployment=') | |
|| source.contains('"result" =') | |
|| source.contains('deploy ') | |
|| source.contains('"rollout-plan" =') | |
|| source.contains('"description" =') | |
|| source.contains('deployment-overlay ') | |
) { | |
return "ruby"; | |
} else if (source.contains(".sh") | |
|| source.contains(".bat") | |
|| source.contains("cd ") | |
|| source.contains("mvn ") | |
|| source.contains("keytool") | |
|| source.contains("@Test") | |
|| source.contains("grep") | |
|| source.contains("cat") | |
|| source.contains("##") | |
|| source.contains(" matches") | |
|| source.contains("cp ") | |
|| source.contains("ls ") | |
|| source.contains("-D") | |
|| source.contains("curl ") | |
|| source.contains("sudo ") | |
|| source.contains("vi ") | |
|| source.contains("yum ") | |
|| source.contains("git ") | |
|| source.contains("rm ") | |
) { | |
return "bash" | |
}else if (source.matches("\\\\w+=[^\\\\s]+")){ | |
println(" found props") | |
return "" | |
}else if ( | |
source.contains("MSC service thread") | |
|| source.contains("[stdout] ") | |
|| source.contains("; legacy version: ") | |
|| source.contains("(DeploymentScanner-threads: ") | |
|| source.contains("DeploymentScanner-threads ") | |
|| source.contains("http://") | |
|| source.contains(".jar") | |
|| source.contains(".war") | |
){ | |
return "" | |
}else if (source.contains("CREATE TABLE")){ | |
return "sql" | |
} | |
//println "we have to guess: " + source | |
return "" | |
//return "java" | |
} | |
static String getAdmotionMapping(String confluenceType){ | |
/* | |
note | |
tip | |
info | |
warning | |
*/ | |
if (confluenceType.equals("note")){ | |
return "NOTE" | |
} | |
if (confluenceType.equals("tip")){ | |
return "TIP"; | |
} | |
if (confluenceType.equals("info")){ | |
return "IMPORTANT" | |
} | |
if (confluenceType.equals("warning")){ | |
return "WARNING" | |
} | |
throw new RuntimeException("Could not map '"+confluenceType+"'") | |
} | |
src.toFile().eachFileRecurse { f -> | |
def relative = src.relativize(f.toPath()) | |
def target = dst.resolve(relative) | |
if (f.isDirectory()) { | |
if (Files.notExists(target)) { | |
Files.createDirectory(target) | |
}; | |
} else if (f.name.endsWith('.html')) { | |
def tmpHtml = Paths.get('clean', relative.toString()) //File.createTempFile('clean', 'html') | |
println "Converting $relative" | |
String html = new String(Files.readAllBytes(f.toPath()),StandardCharsets.UTF_8); | |
html = html | |
.replaceAll("<code class=\\\"value\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"plain\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"comments\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"keyword\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"string\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"color1\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<code class=\\\"preprocessor\\\">(.*?)<\\/code>","\$1") | |
.replaceAll("<div class=\\\"line\\\">(.*?)<\\/div>","\$1") | |
.replaceAll("<code>(.*?)<\\/code>","\$1") | |
def result = cleaner.clean(html) | |
TagNodeVisitor visitor = { parent, htmlNode -> | |
if (htmlNode instanceof TagNode) { | |
TagNode tagNode = (TagNode)htmlNode; | |
tagNode?.attributes?.remove 'class' | |
tagNode?.attributes?.remove 'id' | |
if ('td' == tagNode?.name || 'th' == tagNode?.name) { | |
tagNode.name = 'td' | |
String txt = tagNode.text | |
tagNode.removeAllChildren() | |
tagNode.insertChild(0, new ContentNode(txt)) | |
} | |
if ('p' == tagNode?.name && 'title' == tagNode.attributes.id) { | |
tagNode.removeFromTree() | |
} | |
if ('ul' == tagNode?.name && 'docnav' == tagNode.getAttributeByName("class")) { | |
tagNode.removeFromTree() | |
print "ul processed" | |
} | |
if ('pre' == tagNode?.name) { | |
String className = tagNode.getAttributeByName("class") | |
if (className != null && className.contains("brush: java")) { | |
tagNode.removeAttribute("class") | |
tagNode.addAttribute("class", "java") | |
} | |
} | |
if ('div' == tagNode?.name) { | |
tagNode.removeAttribute("id") | |
String className = tagNode.getAttributeByName("class") | |
if (className != null) { | |
if (className.contains("admonition-body")) { | |
String parentClass = parent.getAttributeByName("class") | |
String adMotionLevel = parentClass.substring(parentClass.lastIndexOf("-")+1) | |
int idx = parent.getParent().getChildIndex(parent); | |
parent.getParent().insertChild(idx, tagNode); | |
List<TagNode> titleNodes = parent.getElementListByName("p",false); | |
parent.removeFromTree(); | |
tagNode.removeAttribute("class") | |
tagNode.insertChild(0, new ContentNode("["+getAdmotionMapping(adMotionLevel)+"]")) | |
if (!titleNodes.isEmpty()) { | |
tagNode.insertChild(0, titleNodes.get(0)) | |
//parent.getParent().insertChildBefore(tagNode, titleNodes.get(0)) | |
} | |
} else if ("panel-content".equals(className)) { | |
tagNode.removeAttribute("class") | |
tagNode.insertChild(0, new ContentNode("****")) | |
tagNode.addChild(new ContentNode("****")) | |
print("merge careful, new panel format for file: ") | |
} else if (className.contains("syntaxhighlighter")) { | |
if (!"confbox programlisting".equals(parent.getAttributeByName("class"))){ | |
println "something is really wrong" | |
throw new RuntimeException("bruh"); | |
} | |
List<BaseToken> children = tagNode.getAllChildren(); | |
int index = parent.getParent().getChildIndex(tagNode.getParent()); | |
TagNode pre = new TagNode("pre") | |
pre.addChildren(children); | |
parent.getParent().insertChild(index, pre) | |
pre.addAttribute("class", guessLanguage(pre)) | |
tagNode.getParent().removeFromTree(); | |
} | |
} | |
} | |
} | |
true | |
} as TagNodeVisitor | |
result.traverse(visitor) | |
//result.traverse(divCleaner) | |
serializer.writeToFile( | |
result, tmpHtml.absolutePath, "utf-8" | |
) | |
//println "pandoc -f html -t asciidoctorj -R -S --normalize -s $tmpHtml -o ${target}.adoc" | |
//"pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target.getFileName()}.adoc".execute().waitFor() | |
def targetPath = dst.absolutePath + "\\${target.fileName.toString().substring(0, target.fileName.toString().length() - 5)}.adoc" | |
println targetPath | |
//println "pandoc --section-divs -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${targetPath}" | |
"pandoc -R -f html -R --atx-headers -t asciidoc -s $tmpHtml -o ${targetPath}".execute().waitFor() | |
//tmpHtml.delete() | |
} else { | |
if (Files.notExists(f.toPath())) { | |
Files.copy(f.toPath(), target) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment