Skip to content

Instantly share code, notes, and snippets.

@unhammer
Created December 14, 2020 08:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unhammer/830f780ee873fcf6b3842e47f6b75d64 to your computer and use it in GitHub Desktop.
Save unhammer/830f780ee873fcf6b3842e47f6b75d64 to your computer and use it in GitHub Desktop.
Data.Algorithm.Diff.Words extension to Diff package
-----------------------------------------------------------------------------
-- |
-- Module : Data.Algorithm.Diff.Words
-- Copyright : (c) Kevin Brubeck Unhammer 2020
-- License : BSD 3 Clause
-- Maintainer : s.clover@gmail.com
-- Stability : experimental
-- Portability : portable
--
-- Convenience functions for "word-diffing" / grouping diffs by separators.
-----------------------------------------------------------------------------
module Data.Algorithm.Diff.Words
( groupDiffBySep
, groupDiffBy
)
where
import Data.Algorithm.Diff (Diff, PolyDiff (..), getDiff)
-- | Takes a diff and returns a list of differences between them, grouped
-- into chunks, but only split by @seps@.
--
-- Similar to 'Data.Algorithm.Diff.getGroupedDiff', this is handy for
-- when you don't want to split words:
-- >>> groupDiffBySep " \n" (getDiff "abc def" "abc dxf")
-- [Both "abc" "abc",Both " " " ",First "def",Second "dxf"]
--
-- Uses 'groupDiffBy' with the predicate being Both-membership in
-- @seps@.
groupDiffBySep :: Eq sep => [sep] -> [Diff sep] -> [Diff [sep]]
groupDiffBySep seps = groupDiffBy (`elem` sepsAsDiffs)
where sepsAsDiffs = map (\s -> Both s s) seps
-- | Takes a diff and returns a list of differences between them, grouped
-- into chunks, but only split where the diff matches @pred@.
--
-- Similar to 'Data.Algorithm.Diff.getGroupedDiffBy', this is handy for
-- when you only want the chunks split at certain points.
groupDiffBy :: Eq c => (Diff c -> Bool) -> [Diff c] -> [Diff [c]]
groupDiffBy pred ds = concatMap conc $ split pred ds
where
fromDiff :: Diff sep -> sep
fromDiff (Both a _) = a
fromDiff (First a ) = a
fromDiff (Second a) = a
conc :: [Diff sep] -> [Diff [sep]]
conc l | all isBoth l = [Both (map fromDiff l) (map fromDiff l)]
| otherwise = [First (map fromDiff $ filter (not . isSecond) l)]
++
[Second (map fromDiff $ filter (not . isFirst) l)]
isBoth :: Diff a -> Bool
isBoth (Both _ _) = True
isBoth _ = False
isFirst :: Diff a -> Bool
isFirst (First _ ) = True
isFirst _ = False
isSecond :: Diff a -> Bool
isSecond (Second _) = True
isSecond _ = False
-- | Like Data.List.Extra.split, but keep the separators.
--
-- >>> split (`elem` "*#") "abc#def"
-- ["abc","#","def"]
-- >>> split (`elem` "*#") "*abc##ghi*"
-- ["","*","abc","#","","#","ghi","*",""]
split :: (a -> Bool) -> [a] -> [[a]]
split f (x:xs) | f x = [] : [x] : split f xs
split f (x:xs) | y:ys <- split f xs = (x:y) : ys
split _ _ = [[]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment