Skip to content

Instantly share code, notes, and snippets.

@k16shikano
Last active September 17, 2020 11:02
Show Gist options
  • Save k16shikano/ca11df4aa1bcaf91770a00607c1801e4 to your computer and use it in GitHub Desktop.
Save k16shikano/ca11df4aa1bcaf91770a00607c1801e4 to your computer and use it in GitHub Desktop.
圧縮解除した /ToUnicode を雑に本質に書き換える
{-# LANGUAGE OverloadedStrings #-}
module Main where
import System.Environment (getArgs)
import Data.Char (chr)
import Numeric (showHex, readHex)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as B
import Text.Parsec hiding (many, (<|>))
import Control.Applicative
import Text.Parsec.ByteString
main = do
f:rest <- getArgs
t <- B.readFile f
B.putStrLn $ letItSnow t
letItSnow :: ByteString -> ByteString
letItSnow str =
case runParser
(B.concat <$> (manyTill $ choice
[ try header
, try bfchar
, try bfrange
, otherline
]) (eof))
() "" str of
Left err -> error $ "Can not parse PDF " ++ (show err)
Right s -> s
letItSnowman :: Int -> ByteString
letItSnowman h = B.concat ["<", h', ">", " <2603>"]
where h' | h < 16 = B.pack $ '0':(showHex h "")
| h < 256 = B.pack $ showHex h ""
| h < 4096 = B.pack $ '0':(showHex h "")
| otherwise = B.pack $ showHex h ""
hexInt = fst . head . readHex . B.unpack
hexletters :: Parser ByteString
hexletters = do
char '<'
ls <- choice
[ try $ manyTill (count 4 $ hexletter) (try $ char '>')
, (:[]) <$> (count 2 $ hexletter) <* char '>'
]
spaces
return $ B.pack $ concat ls
hexletter :: Parser Char
hexletter = oneOf "0123456789ABCDEFabcdef"
hexletterArray :: Parser ByteString
hexletterArray = do
char '['
spaces
lets <- manyTill hexletters (try $ spaces >> char ']')
spaces
return $ B.intercalate "\n" lets
header :: Parser ByteString
header = do
h <- string "begincmap"
s <- manyTill anyChar (try $ string "endcodespacerange")
spaces
return $ B.pack $ h++s++"\nendcodespacerange"
otherline :: Parser ByteString
otherline = B.pack <$> (++"\n")
<$> manyTill anyChar (try $ oneOf "\n")
bfchar :: Parser ByteString
bfchar = do
many1 digit
spaces
string "beginbfchar"
spaces
ms <- many (toSnowman <$> hexletters <* hexletters)
spaces
string "endbfchar"
spaces
return $ B.unlines $
[B.unwords [B.pack $ show $ length ms, "beginbfchar"]]
++ ms ++
["endbfchar"]
where
toSnowman c = letItSnowman $ hexInt c
bfrange :: Parser ByteString
bfrange = do
many1 digit
spaces
string "beginbfrange"
spaces
ms <- many1 (toSnowman
<$> hexletters
<*> (hexletters
<* (try hexletters <|> hexletterArray)))
spaces
string "endbfrange"
spaces
return $ B.concat $ -- unlines $
[B.unwords [B.pack $ show $ length ms, "beginbfchar\n"]]
++ ms ++
["endbfchar\n"]
where
toSnowman c c'
= B.unlines $ map letItSnowman [hexInt c .. hexInt c']
@k16shikano
Copy link
Author

$ pdftk any.pdf output temp.pdf uncompress
$ runhaskell toSnowman.hs temp.pdf > snowman.pdf
$ pdftk snowman.pdf output honshitsu.pdf compress

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment