Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
How to remove empty text from HTML with Jsoup + Scala
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import scala.collection.JavaConverters._
def removeAllEmptyChildren(doc: Document): = {
val allElements = doc.body().getAllElements()
allElements.asScala.foreach { element =>
if(!element.hasText) {
element.remove()
}
}
doc
}
/*
scala> Jsoup.parse("<p>hi there</p><p><b>Bold text</b><i></i><p></p> yay stuff and things</p>")
res0: org.jsoup.nodes.Document =
<html>
<head></head>
<body>
<p>hi there</p>
<p><b>Bold text</b><i></i></p>
<p></p> yay stuff and things
<p></p>
</body>
</html>
scala> removeAllEmptyChildren(res0)
res1: org.jsoup.nodes.Document =
<html>
<head></head>
<body>
<p>hi there</p>
<p><b>Bold text</b></p> yay stuff and things
</body>
</html>
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment