Skip to content

Instantly share code, notes, and snippets.

@silverweed
Last active April 11, 2018 10:15
Show Gist options
  • Save silverweed/a1da3e88823a64316083 to your computer and use it in GitHub Desktop.
Save silverweed/a1da3e88823a64316083 to your computer and use it in GitHub Desktop.
maudmap-hs
{-# LANGUAGE OverloadedStrings #-}
-- Emit sitemap for maud (requires access to maud db)
import Database.MongoDB
import System.Environment
import Data.Time.Clock.POSIX
import qualified Data.Time.Format as DTF
import Data.List (intercalate)
import Data.List.Split
domain :: String
domain = "https://crunchy.rocks"
dbName :: Database
dbName = "maud"
parseArgs :: [String] -> IO String
parseArgs [] = return "127.0.0.1"
parseArgs (x:_) = return x
emitUrl :: Document -> [String]
emitUrl [] = []
emitUrl (f:fs) | l == "shorturl" = (wrap "loc" $ domain ++ "/thread/" ++ v) : emitUrl fs
| l == "lrdate" || l == "lastupdate" = (wrap "lastmod" (todate $ read $ show $ value f)) : emitUrl fs
| l == "name" = (wrap "loc" $ domain ++ "/tag/" ++ (escape v)) : emitUrl fs
| otherwise = []:emitUrl fs
where
l = label f
v = init $ tail $ show $ value f
todate :: Int -> String
todate = DTF.formatTime DTF.defaultTimeLocale "%Y-%m-%dT%H:%M:%S" . posixSecondsToUTCTime . realToFrac
escape :: String -> String
escape [] = []
escape (c:cs) | c == '/' = "/" ++ escape cs
| otherwise = c : escape cs
emitXML :: Pipe -> Collection -> IO ()
emitXML pipe coll = do
let run = access pipe master dbName
docs <- run $ find (select [] coll) {
project = [ "shorturl" =: True
, "lrdate" =: True
, "lastupdate" =: True
, "name" =: True
, "_id" =: False
]} >>= rest
putStrLn $ intercalate "\n" $ map (wrapUrl coll) $ map emitUrl docs
wrapUrl :: Collection -> [String] -> String
wrapUrl _ [] = []
wrapUrl coll lst = i 2 $ wrap "url" $ "\n" ++ intercalate "\n"
(map (i 1) (lst
++ [wrap "changefreq" (cf coll)]
++ [wrap "priority" (prio coll)]
)) ++ "\n"
where
prio "home" = "1.0"
prio "stiki" = "0.8"
prio "new" = "0.75"
prio "threads" = "0.6"
prio "tags" = "0.5"
prio _ = "0"
cf "threads" = "daily"
cf "tags" = "weekly"
cf "stiki" = "monthly"
cf "new" = "yearly"
cf "home" = "yearly"
cf _ = "never"
emitHeader :: IO ()
emitHeader = do putStrLn "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
putStrLn "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"
emitFooter :: IO ()
emitFooter = putStrLn "</urlset>"
emitStatic :: String -> [String] -> IO ()
emitStatic "home" _ = putStrLn $ wrapUrl "home" [ wrap "loc" (domain ++ "/")
, wrap "lastmod" "2015-01-01T00:00:00" ]
emitStatic "new" _ = putStrLn $ wrapUrl "new" [ wrap "loc" (domain ++ "/new")
, wrap "lastmod" "2015-01-01T00:00:00" ]
emitStatic "stiki" [] = return ()
emitStatic "stiki" (page:pages) = do putStrLn $ wrapUrl "stiki" [ wrap "loc" $ domain ++ "/stiki/" ++ page
, wrap "lastmod" "2016-12-05T18:14:00"
]
emitStatic "stiki" pages
emitStatic _ _ = return ()
i :: Int -> String -> String
i n s = intercalate "\n" [[' ' | _ <- [1..n*4]] ++ line | line <- splitOn "\n" s]
wrap :: String -> String -> String
wrap tag string = "<" ++ tag ++ ">" ++ string ++ "</" ++ tag ++ ">"
main :: IO ()
main = do
pipe <- getArgs >>= parseArgs
>>= \dbaddr -> connect $ host dbaddr
emitHeader
emitStatic "home" []
emitStatic "new" []
emitStatic "stiki" [ "cookie-policy", "formatting", "nsfw-policy", "dmca", "video-tag" ]
emitXML pipe "threads"
emitXML pipe "tags"
emitFooter
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment