melix/convert.groovy

## convert.groovy
@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.4')
import org.htmlcleaner.*

def src = new File('html').toPath()
def dst = new File('asciidoc').toPath()

def cleaner = new HtmlCleaner()
def props = cleaner.properties
props.translateSpecialEntities = false
def serializer = new SimpleHtmlSerializer(props)

src.toFile().eachFileRecurse { f ->
    def relative = src.relativize(f.toPath())
    def target = dst.resolve(relative)
    if (f.isDirectory()) {
        target.toFile().mkdir()
    } else if (f.name.endsWith('.html')) {
        def tmpHtml = File.createTempFile('clean', 'html')
        println "Converting $relative"
        def result = cleaner.clean(f)
        result.traverse({ tagNode, htmlNode ->
                tagNode?.attributes?.remove 'class'
                if ('td' == tagNode?.name || 'th'==tagNode?.name) {
                    tagNode.name='td'
                    String txt = tagNode.text
                    tagNode.removeAllChildren()
                    tagNode.insertChild(0, new ContentNode(txt))
                }

            true
        } as TagNodeVisitor)
        serializer.writeToFile(
                result, tmpHtml.absolutePath, "utf-8"
        )
        "pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target}.adoc".execute().waitFor()
        tmpHtml.delete()
    }/* else {
        "cp html/$relative $target".execute()
    }*/
}
	@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.4')
	import org.htmlcleaner.*

	def src = new File('html').toPath()
	def dst = new File('asciidoc').toPath()

	def cleaner = new HtmlCleaner()
	def props = cleaner.properties
	props.translateSpecialEntities = false
	def serializer = new SimpleHtmlSerializer(props)

	src.toFile().eachFileRecurse { f ->
	def relative = src.relativize(f.toPath())
	def target = dst.resolve(relative)
	if (f.isDirectory()) {
	target.toFile().mkdir()
	} else if (f.name.endsWith('.html')) {
	def tmpHtml = File.createTempFile('clean', 'html')
	println "Converting $relative"
	def result = cleaner.clean(f)
	result.traverse({ tagNode, htmlNode ->
	tagNode?.attributes?.remove 'class'
	if ('td' == tagNode?.name \|\| 'th'==tagNode?.name) {
	tagNode.name='td'
	String txt = tagNode.text
	tagNode.removeAllChildren()
	tagNode.insertChild(0, new ContentNode(txt))
	}

	true
	} as TagNodeVisitor)
	serializer.writeToFile(
	result, tmpHtml.absolutePath, "utf-8"
	)
	"pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target}.adoc".execute().waitFor()
	tmpHtml.delete()
	}/* else {
	"cp html/$relative $target".execute()
	}*/
	}