Skip to content

Instantly share code, notes, and snippets.

@Sciss
Created June 13, 2022 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sciss/83b39371feacffe779eb726e7d064c79 to your computer and use it in GitHub Desktop.
Save Sciss/83b39371feacffe779eb726e7d064c79 to your computer and use it in GitHub Desktop.
val dirIn = userHome / "Downloads" / "bla"
val pIn = dirIn.children(_.name.endsWith(".html")).sorted(File.NameOrdering)
def getPageFiles(htmlF: File): Seq[File] = {
val dir = htmlF.parent
val in = new java.io.FileInputStream(htmlF)
val arr = new Array[Byte](in.available)
in.read(arr)
in.close()
val html = new String(arr, "UTF-8")
var res = Seq.empty[File]
def getPage(c: Char): Unit = {
val iL = html.indexOf(s"""data-side="$c"""")
if (iL > 0) {
val jL = html.indexOf("""src="""", iL) + 5
val kL = html.indexOf("\"", jL)
val name = html.substring(jL, kL)
val f = dir / name
if (!f.isFile) println(s"! Warning. File $name does not exist")
res :+= dir / name
}
}
getPage('L')
getPage('R')
res
}
val imagesIn = pIn.flatMap(getPageFiles)
imagesIn.size
// BookReaderImages.jpeg
// BookReaderImages_002.jpeg
// BookReaderPreview_002.jpeg
// BookReaderPreview.jpeg
// val dirImgOut = dirIn / "pages"
// dirImgOut.mkdir()
// imagesIn.zipWithIndex.foreach { case (f, fi) =>
// import sys.process._
// val ext = f.extL
// val fOut = dirImgOut / s"page-${fi + 1}.$ext"
// Seq("cp", f.path, fOut.path).!
// }
val cmd = Seq("convert", "-density", "150") ++ imagesIn.map(_.path) ++ Seq((dirIn / "output.pdf").path)
// NOTE: if running out of memory (""),
// then edit /etc/ImageMagick-6/policy.xml
// and change <policy domain="resource" name="disk" value="1GiB"/> to <policy domain="resource" name="disk" value="8GiB"/>
{ import sys.process._; cmd.! }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment