Skip to content

Instantly share code, notes, and snippets.

@mistidoi
Created July 25, 2016 00:31
Show Gist options
  • Save mistidoi/6a5bc7951cc2c75049c99c44f957e299 to your computer and use it in GitHub Desktop.
Save mistidoi/6a5bc7951cc2c75049c99c44f957e299 to your computer and use it in GitHub Desktop.
bcp2csv in Haskell (with Lazy BtyeStrings)
import System.Environment
import Data.List.Split
import Data.List.Utils
import Data.List
import qualified Data.ByteString.Lazy.Search as S
import qualified Data.ByteString.Lazy.Char8 as C
-- usage: $ ./bcp2csv input.bcp output.csv
main :: IO ()
main = do
args <- getArgs
input <- C.readFile $ head args
let outputFilename = args !! 1
C.writeFile outputFilename . C.unlines . map joinWithCommas $ parseAndEscapeBCP input
type Field = C.ByteString
type Line = [Field]
-- runs the escaping code on the nested list data structure generated by parseBCP.
parseAndEscapeBCP :: C.ByteString -> [Line]
parseAndEscapeBCP input = map (map escapeField) $ parseBCP input
-- parses BCP file into nested Lists. top level is lines, next level is fields.
parseBCP :: C.ByteString -> [Line]
parseBCP input = map splitFields $ splitLines input
escapeField :: Field -> Field
escapeField = quoteIfNecessary . escapeQuotes
splitLines :: C.ByteString -> [C.ByteString]
splitLines input = S.split (convertToStrictByteString "&$&") input
splitFields :: C.ByteString -> Line
splitFields input = S.split (convertToStrictByteString "#&#") input
convertToStrictByteString input = S.strictify (C.pack input)
escapeQuotes :: Field -> Field
escapeQuotes input
| '"' `C.elem` input = S.replace (convertToStrictByteString "\"") (C.pack "\"\"\"") input
| otherwise = input
quoteIfNecessary :: Field -> Field
quoteIfNecessary input
| '\n' `C.elem` input || ',' `C.elem` input || '"' `C.elem` input = wrapInQuotes input
| otherwise = input
wrapInQuotes :: Field -> Field
wrapInQuotes input = C.concat [C.pack "\"", input, C.pack "\""]
joinWithCommas :: Line -> C.ByteString
joinWithCommas list = C.intercalate (C.pack ",") list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment