Skip to content

Instantly share code, notes, and snippets.

@idrisr
Created October 2, 2023 21:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save idrisr/700d4263f876360b862ce404ee1455b5 to your computer and use it in GitHub Desktop.
Save idrisr/700d4263f876360b862ce404ee1455b5 to your computer and use it in GitHub Desktop.
module Main where
-- https://github.com/Yuras/pdf-toolbox/issues/62
import Pdf.Document
import qualified Pdf.Core as PC
import System.Environment
import qualified Data.Text as T
printRect :: String -> IO ()
printRect f = withPdfFile f $ \pdf ->
do
doc <- document pdf
catalog <- documentCatalog doc
pagenode <- catalogPageNode catalog
txt <- extract pdf pagenode
print txt
extract :: Pdf -> PageNode -> IO T.Text
extract pdf = e . f
where
{- hlint ignore "Use <=<" -}
e :: IO [T.Text] -> IO T.Text
e = (T.concat <$>)
a :: IO [PC.Ref] -> IO [T.Text]
a = do
let b :: IO PageTree -> IO T.Text
b = (>>= go)
let c :: PC.Ref -> IO PageTree
c = loadPageNode pdf
let d :: PC.Ref -> IO T.Text
d = b . c
(>>= traverse d)
go :: PageTree -> IO T.Text
go (PageTreeLeaf l) = pageMediaBox l >>= \r -> return $ T.pack . show $ r
go (PageTreeNode n) = extract pdf n
f :: PageNode -> IO [T.Text]
f = a . pageNodeKids
-- Main function
main :: IO ()
main = do
args <- getArgs
case args of
[filePath] -> printRect filePath
_ -> putStrLn "Usage: program name <pdf-file>"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment