-
-
Save ANierbeck/0c062faef5b55277a15a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.4') | |
import org.htmlcleaner.* | |
def src = new File('html').toPath() | |
def dst = new File('asciidoc').toPath() | |
def cleaner = new HtmlCleaner() | |
def props = cleaner.properties | |
props.translateSpecialEntities = false | |
def serializer = new SimpleHtmlSerializer(props) | |
src.toFile().eachFileRecurse { f -> | |
def relative = src.relativize(f.toPath()) | |
def target = dst.resolve(relative) | |
if (f.isDirectory()) { | |
target.toFile().mkdir() | |
} else if (f.name.endsWith('.html')) { | |
def tmpHtml = File.createTempFile('clean', 'html') | |
println "Converting $relative" | |
def result = cleaner.clean(f) | |
result.traverse({ tagNode, htmlNode -> | |
tagNode?.attributes?.remove 'class' | |
if ('td' == tagNode?.name || 'th'==tagNode?.name) { | |
tagNode.name='td' | |
String txt = tagNode.text | |
tagNode.removeAllChildren() | |
tagNode.insertChild(0, new ContentNode(txt)) | |
} | |
true | |
} as TagNodeVisitor) | |
serializer.writeToFile( | |
result, tmpHtml.absolutePath, "utf-8" | |
) | |
"pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target}.adoc".execute().waitFor() | |
tmpHtml.delete() | |
}/* else { | |
"cp html/$relative $target".execute() | |
}*/ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment