Created
October 20, 2015 11:09
-
-
Save fomkin/35049bc1db4505957d6b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.InputStream | |
import java.util.zip.ZipInputStream | |
import scala.io.Source | |
import scala.io.Codec | |
import scala.xml.XML | |
object SimpleOsdParser { | |
type Table = Map[String, Seq[Seq[String]]] | |
def parse(bytes: InputStream): Either[String, Table] = { | |
val content = { | |
def rec(zip: ZipInputStream): Option[String] = { | |
Option(zip.getNextEntry) match { | |
case Some(entry) if entry.getName == "content.xml" ⇒ | |
import UnicodeBOMInputStream.BOM | |
val withBom = new UnicodeBOMInputStream(zip) | |
withBom.getBOM match { | |
case BOM.UTF_8 | BOM.NONE ⇒ | |
val stream = withBom.skipBOM() | |
Some(Source.fromInputStream(stream)(Codec.UTF8).mkString) | |
case _ ⇒ throw new Exception("Unsupported encoding") | |
} | |
case Some(entry) ⇒ | |
rec(zip) | |
case None ⇒ None | |
} | |
} | |
val zipStream = new ZipInputStream(bytes) | |
rec(zipStream) | |
} | |
content match { | |
case Some(xmlString) ⇒ | |
val xml = XML.loadString(xmlString) | |
val tables = xml \ "body" \ "spreadsheet" \ "table" | |
val pairs = tables map { table ⇒ | |
val name = table \@ "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}name" | |
val data = table \ "table-row" map { row ⇒ | |
val cells = row \ "table-cell" map { cell ⇒ | |
cell.text | |
} | |
cells.reverse.filter(_.nonEmpty) | |
} | |
val res = data.reverse.dropWhile(_.isEmpty).reverse | |
(name, res) | |
} | |
Right(pairs.toMap) | |
case None ⇒ Left("Invalid ODS file: content.xml not found") | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment