Last active
March 25, 2023 14:59
-
-
Save nsanmartin/43368d09c6a5cbd9770eda1c6c0cf3a2 to your computer and use it in GitHub Desktop.
Sort w3m bookmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Reads from stdin (one or more) html with w3m bookmars, | |
-- parse them and merges them joining sections with the same | |
-- name and removing duplicate bookmarks for a given section | |
-- (where two bookmars are the same if they have the same url | |
-- and text). | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE NoImplicitPrelude #-} | |
import Control.Monad ((>>), (>>=), mapM) | |
import Data.Attoparsec.Text | |
import Data.Char (isSpace) | |
import Data.List (sortBy, group, sort, foldr, foldr1, map, concat, head, (++)) | |
import Data.Monoid ((<>)) | |
import Data.Ord (comparing) | |
import Data.Text.Lazy (toStrict) | |
import Data.Text.Lazy.Builder | |
import Prelude (Eq, Ord, (==), Either(..), compare, ($), return, (/=), (<$>), (.)) | |
import qualified Data.Text as T | |
import qualified Data.Text.IO as TIO | |
data Html = Html [Section] | |
data Section = Section { name :: T.Text , items :: [Item] } | |
data Item = Item { url :: T.Text , text :: T.Text } deriving (Eq, Ord) | |
instance Eq Section where | |
(Section a _) == (Section b _) = a == b | |
instance Ord Section where | |
compare (Section a _) (Section b _) = compare a b | |
skipStrSpace s = asciiCI s >> skipSpace | |
parseItem = do | |
mapM skipStrSpace [ "<li>", "<a", "href=\""] | |
url <- takeWhile (/= '"') | |
skipStrSpace "\">" | |
text <- takeWhile (/= '<') | |
skipStrSpace "</a>" | |
return $ Item url text | |
parseSection = do | |
skipStrSpace "<h2>" | |
name <- takeWhile (/= '<') | |
mapM skipStrSpace [ "</h2>", "<ul>" ] | |
items <- many' parseItem | |
mapM skipStrSpace | |
[ "<!--End of section (do not delete this comment)-->" , "</ul>" ] | |
return $ Section name $ sort items | |
parseHtml = do | |
mapM skipStrSpace | |
[ "<html>" , "<head>" , "<title>Bookmarks</title>" , "</head>" | |
, "<body>" , "<h1>Bookmarks</h1>" | |
] | |
sections <- many' parseSection | |
mapM skipStrSpace [ "</body>", "</html>" ] | |
return $ Html $ sort sections | |
parseBookmarks = many1 parseHtml | |
buildItemText (Item url text) | |
= fromText "<li><a href=\"" | |
<> fromText url | |
<> fromText "\">" | |
<> fromText text | |
<> fromText "</a>\n" | |
buildSectionText (Section name items) | |
= fromText "<h2>" | |
<> fromText name | |
<> fromText "</h2>\n<ul>\n" | |
<> (foldr (<>) (fromLazyText "") $ map buildItemText items) | |
<> fromText "<!--End of section (do not delete this comment)-->\n</ul>\n" | |
buildHtmlText (Html sections) | |
= fromText "<html><head><title>Bookmarks</title></head>\n<body>\n<h1>Bookmarks</h1>\n" | |
<> (foldr (<>) (fromLazyText "") $ map buildSectionText sections) | |
<> fromText "</body>\n</html>\n" | |
getOutput contents = case parseOnly parseBookmarks contents of | |
(Left err) -> T.pack $ concat [ "Error parsing: ", err, "\n"] | |
(Right htmls) -> toStrict $ toLazyText $ buildHtmlText $ foldr1 joinBookmarks htmls | |
mergeSections :: Section -> Section -> Section | |
mergeSections (Section n ia) (Section _ ib) = | |
Section n $ map head $ Data.List.group $ sort $ ia ++ ib | |
joinBookmarks :: Html -> Html -> Html | |
joinBookmarks (Html sa) (Html sb) = | |
Html $ map (foldr1 mergeSections) $ group $ sort $ sa ++ sb | |
main = getOutput <$> TIO.getContents >>= TIO.putStr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment