Skip to content

Instantly share code, notes, and snippets.

@arademaker
Last active August 10, 2020 23:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arademaker/b88311adafd32e2bda3a0f2b1be5a9b4 to your computer and use it in GitHub Desktop.
Save arademaker/b88311adafd32e2bda3a0f2b1be5a9b4 to your computer and use it in GitHub Desktop.
merge two files
import Data.List
import System.FilePath.Posix
-- read the files into a list of strings (assuming they are an ordered list of pathnames)
-- convert the list of pathnames into a list of objs
-- merge the list of objs
-- produce the output
-- or http://hackage.haskell.org/package/multiset-0.2.2/docs/Data-MultiSet.html
line1 = "ontonotes-release-5.0/data/files/data/english/annotations/bc/cctv/00/cctv_0000.parse"
line2 = "data/ontonotes/bc/cctv/00/cctv_0000.gold_conll"
split :: Char -> String -> [String]
split c xs = case break (==c) xs of
(ls, "") -> [ls]
(ls, x:rs) -> ls : split c rs
data Obj = Obj { path :: String,
name :: String,
extension :: String } deriving (Show)
instance Eq Obj where
x == y = name x == name y
instance Ord Obj where
(<) x y = name x < name y
compare x y = compare (name x) (name y)
(<=) x y = name x <= name y
path2obj :: String -> Obj
path2obj s = Obj (takeDirectory s) (takeBaseName s) (takeExtension s)
join :: [Obj] -> [Obj] -> [(Maybe Obj,Maybe Obj)]
join [] [] = []
join [] (y:ys) = (Nothing, Just y) : join [] ys
join (x:xs) [] = (Just x, Nothing) : join xs []
join (x:xs) (y:ys)
| x == y = (Just x, Just y) : join xs ys
| x < y = (Just x, Nothing) : join xs (y:ys)
| x > y = (Nothing, Just y) : join (x:xs) ys
print_pair sep (Just x, Just y) = intercalate sep [name x, path x, path y]
print_pair sep (Nothing, Just x) = intercalate sep [name x, "-", path x]
print_pair sep (Just x, Nothing) = intercalate sep [name x, path x, "-"]
main = do
s1 <- readFile "ont-list"
s2 <- readFile "propbank-list"
let r1 = map path2obj (lines s1)
let r2 = map path2obj (lines s2)
mapM (\o -> putStr $ print_pair " " o ++ "\n") (join r1 r2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment