Skip to content

Instantly share code, notes, and snippets.

@master-q
Created January 5, 2012 11:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save master-q/1564930 to your computer and use it in GitHub Desktop.
Save master-q/1564930 to your computer and use it in GitHub Desktop.
GetAllPagePan_Fin.hs
#!/usr/bin/env runhaskell
import qualified Text.Pandoc as P
import System.Process
import System.Exit
import Data.ByteString.Char8 ()
import Codec.Binary.UTF8.String
{--
http://groups.google.com/group/haskell-jp/browse_thread/thread/b15a016985d8c426
によるとghc 7.0.Xまでのsystem関数はlocale encodingが通らない。変換が必要。
--}
inlineToString :: [P.Inline] -> String
inlineToString inlines = foldr (++) "" $ fmap go inlines
where
go :: P.Inline -> String
go (P.Str s) = s
go P.Space = " "
go e = error "inlineToString can't understand: " ++ show e
findLink' :: [P.Inline] -> [(String, String)]
findLink' inlines = concat $ fmap go inlines
where
go :: P.Inline -> [(String, String)]
go (P.Link inline (url, _)) = [(url, inlineToString inline)]
go _ = []
findLink :: [P.Block] -> [(String, String)]
findLink blocks = concat $ fmap go blocks
where
go :: P.Block -> [(String, String)]
go (P.Plain inlines) = findLink' inlines
go (P.Para inlines) = findLink' inlines
go (P.BulletList blockss) = concat $ fmap findLink blockss
go _ = []
htmlToPandoc :: String -> P.Pandoc
htmlToPandoc = P.readHtml P.defaultParserState{ P.stateStandalone = True }
curlIt :: (String, String) -> IO ExitCode
curlIt ss = rawSystem' "curl" ["-d",
"p=" ++ postp,
"-d",
"c=e",
"http://www.sampou.org/cgi-bin/haskell.cgi",
"-o",
outfile]
where postp = tail $ dropWhile (/= '?') $ fst ss
outfile = (snd ss) ++ ".html"
rawSystem' cmd = rawSystem cmd . map encodeString
main :: IO ()
main = do
con <- getContents
let P.Pandoc _ blocks = htmlToPandoc con
mapM_ curlIt $ findLink blocks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment