Last active
December 15, 2017 11:09
-
-
Save chris-martin/4b0643749aa71054be87607dc3cb4bb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedLists #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
module Wrap where | |
import Control.Arrow ((>>>)) | |
import Data.Bool (not, (||)) | |
import Data.Char (isSpace) | |
import Data.Function ((&)) | |
import Data.Functor (fmap, ($>)) | |
import Data.List (foldr) | |
import Data.List.NonEmpty (NonEmpty (..), nonEmpty, toList) | |
import Data.Maybe (Maybe (..), fromMaybe) | |
import Data.Ord ((<=)) | |
import Data.Text.Lazy (Text, append, empty, groupBy, intercalate, | |
length, splitAt, splitOn) | |
import Numeric.Natural (Natural) | |
import Prelude (fromIntegral, undefined) | |
-- https://thecleancoder.blogspot.ca/2010/10/craftsman-62-dark-path.html | |
-- Write down the type of the thing you're trying to define. | |
-- `wrap column input` produces `input` with line breaks inserted so that no | |
-- line is longer than `column`, preferring to break lines at word boundaries. | |
wrap :: Natural -> Text -> Text | |
-- Consider whether there is anything we can do to break the problem into | |
-- smaller pieces that can be solved separately. I think there's one candidate | |
-- that seems good here: Let's try breaking the input into its component lines, | |
-- wrapping each line, and then putting the result back together. The way that | |
-- one line is wrapped shouldn't affect the way another line is wrapped, so the | |
-- problem cleanly reduces to wrapping a single line at a time, which seems | |
-- easier to think about. | |
wrap column = splitOn "\n" >>> fmap (wrapOneLine column) >>> intercalate "\n" | |
-- Same as `wrap`, but the `input` argument may not contain any line breaks. | |
wrapOneLine :: Natural -> Text -> Text | |
-- Is there another way to further subdivide the input? We should probably try | |
-- splitting it on word boundaries, since that's the only aspect of the string's | |
-- structure that we care about. | |
-- Splits a string on word boundaries. | |
tokenize :: Text -> [Text] | |
-- A place where we're comfortable inserting a line break is a place that is | |
-- either before or after a space (or both; breaking between two spaces seems | |
-- acceptable). | |
tokenize = groupBy (\x y -> not (isSpace x || isSpace y)) | |
-- It isn't entirely obvious to me that what we just wrote is correct, so let's | |
-- test it: | |
-- | >>> tokenize "abc def gh" | |
-- ["abc"," ","def"," "," ","gh"] | |
-- Looks good. Notice how each space character is its own separate token, even | |
-- where there were two spaces in a row. | |
-- Now I think we're going to need a function that packs as many tokens as | |
-- possible into one line without breaking any. | |
type Packing = (Text, [Text]) | |
-- `pack column tokens` returns the concatenation of as many tokens as possible | |
-- without exceeding `column` characters, and a list of the remaining unconsumed | |
-- tokens. If it is not possible to pack *any* tokens, we will return Nothing. | |
pack :: Natural -> NonEmpty Text -> Maybe Packing | |
-- To implement this, let's start by writing a function that enumerates *all* of | |
-- the ways to pack tokens, regardless of maximum column length. | |
packings :: NonEmpty Text -> [Packing] | |
packings tokens = f (empty, toList tokens) | |
where | |
f :: Packing -> [Packing] | |
f (_, []) = [] | |
f (packed, nextToken : remainingTokens) = packing : f packing | |
where packing = (append packed nextToken, remainingTokens) | |
-- Again this is probably something that we need to test to make sure what we | |
-- just wrote makes sense. | |
-- | >>> packings ["a", "b"] | |
-- [("a",["b"]),("ab",[])] | |
-- Now let's implement `pack` by selecting, among the possible packings, the | |
-- longest one that fits within the column length. | |
pack column = packings >>> foldr f Nothing | |
where | |
f :: Packing -> Maybe Packing -> Maybe Packing | |
f x@(packed, _) Nothing | fromIntegral (length packed) <= column = Just x | |
f _ y = y | |
-- This one calls for a whole battery of tests, to cover all of the cases. | |
-- | All of the tokens fit onto the line | |
-- >>> pack 3 ["a", "bb"] | |
-- Just ("abb",[]) | |
-- | Not all of the tokens fit onto the line | |
-- >>> pack 4 ["a", "bb", "cc"] | |
-- Just ("abb",["cc"]) | |
-- | No tokens fit onto the line | |
-- >>> pack 2 ["abc"] | |
-- Nothing | |
-- In the last case, we're allowed to resort to breaking a token. So let's write | |
-- a modified version of the `pack` function that breaks a token if necessary, | |
-- thus eliminating `Maybe` from the codomain. | |
pack' :: Natural -> NonEmpty Text -> Packing | |
pack' column tokens@(firstToken :| remainingTokens) = | |
pack column tokens & fromMaybe break | |
where | |
break = splitAt (fromIntegral column) firstToken & fmap (: remainingTokens) | |
-- | >>> pack' 2 ["abc", "d"] | |
-- ("ab",["c","d"]) | |
-- Now that we know how to pack a single line full of as many tokens as it can | |
-- hold, we can do that recursively to fill a bunch of lines. | |
wrapTokens :: Natural -> [Text] -> [Text] | |
wrapTokens column tokens = | |
case nonEmpty tokens of | |
Nothing -> [] | |
Just tokens' -> | |
let (line, moreTokens) = pack' column tokens' | |
in line : wrapTokens column moreTokens | |
-- Let's just reuse the same test cases we used for `pack`. | |
-- | >>> wrapTokens 3 ["a", "bb"] | |
-- ["abb"] | |
-- | >>> wrapTokens 4 ["a", "bb", "cc"] | |
-- ["abb","cc"] | |
-- | >>> wrapTokens 2 ["abc"] | |
-- ["ab","c"] | |
-- Now we can finally get back to implementing `wrapOneLine`. | |
wrapOneLine column = tokenize >>> wrapTokens column >>> intercalate "\n" | |
-- So now the `wrap` function ought to work. | |
-- | >>> wrap 3 "one two three" | |
-- "one\n \ntwo\n \nthr\nee" | |
-- | >>> wrap 4 "one two three" | |
-- "one \ntwo \nthre\ne" | |
-- | >>> wrap 5 "one two three" | |
-- "one \ntwo \nthree" | |
-- | >>> wrap 5 "one two three" | |
-- "one \n two \nthree" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment