Skip to content

Instantly share code, notes, and snippets.

@runjak
Created March 2, 2014 14:21
Show Gist options
  • Save runjak/9307234 to your computer and use it in GitHub Desktop.
Save runjak/9307234 to your computer and use it in GitHub Desktop.
Simple, clutchy deduplication done with softlinks.
module Main where
{-
When using `find -type f -print0 | xargs -0 md5sum > foo.sums`,
this script allows to compare two files of such sums for their intersecting sums.
The `drop 4` later on drops the ' ./' after the md5sums,
and the `splitAt 32` determines the length of the sum,
in case another sum should be used.
Deduplication is performed by replacing the files from f1
with softlinks to files from f2.
-}
import Control.Arrow (second)
import Control.Monad
import Data.Map (Map)
import qualified Data.Map as Map
import qualified System.IO.UTF8 as UTF8
f1, f2, p1, p2 :: FilePath
f1 = "/tmp/1.sums"
f2 = "/tmp/2.sums"
p1 = "prefix1/"
p2 = "prefix2/"
mkMap :: FilePath -> String -> Map String String
mkMap fp = Map.fromList . map (go fp) . lines
where
go :: FilePath -> String -> (String, String)
go fp = second (sanitize fp . drop 4) . splitAt 32
sanitize :: FilePath -> String -> String
sanitize fp s = "\""++fp++s++"\""
main :: IO ()
main = do
m1 <- liftM (mkMap p1) $ UTF8.readFile f1
m2 <- liftM (mkMap p2) $ UTF8.readFile f2
let mfs = Map.elems $ Map.intersectionWith (,) m1 m2
links (old, new) = UTF8.putStrLn ("ln -sf "++new++" "++old)
mapM_ links mfs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment