Skip to content

Instantly share code, notes, and snippets.

@jarnaldich
Created March 18, 2022 18:07
Show Gist options
  • Save jarnaldich/64df23f9c6e60bcc2625a37ba4b07053 to your computer and use it in GitHub Desktop.
Save jarnaldich/64df23f9c6e60bcc2625a37ba4b07053 to your computer and use it in GitHub Desktop.
XML to JSON in Haskell sample #haskell #json #xml
#!/usr/bin/env stack
{-
stack
--install-ghc runghc
--package aeson
--package aeson-pretty
--package lens-aeson
--package xml-lens
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Main where
import System.Environment (getArgs)
import Data.Text as T
import Data.Text.IO as T
import Data.ByteString.Lazy.Char8 as B
import Data.Vector as V
import Data.List as List
import System.Exit
import System.FilePath (takeBaseName, takeFileName)
import Debug.Trace as Dbg
import Data.Aeson (Value(..), encode, decode)
import Data.Aeson.Lens
import Data.Aeson.Encode.Pretty (encodePretty)
import Text.XML
import Text.XML.Lens
dstId = _Object .ix "id"
dataIdentification = root
. ell "MD_Metadata"
./ ell "identificationInfo"
./ ell "MD_DataIdentification"
citation = dataIdentification
./ ell "Citation"
./ ell "CI_Citation"
idLens = citation
./ ell "identifier"
./ ell "RS_Identifier"
./ ell "code"
./ ell "CharacterString"
. text
properties field = _Object . ix "properties" . _Object . ix field
idTemporal beginOrEnd = dataIdentification
./ ell "extent"
./ ell "EX_Extent"
./ ell "temporalElement"
./ ell "EX_TemporalExtent"
./ ell "extent"
./ ell "TimePeriod"
./ ell beginOrEnd
./ ell "TimeInstant"
./ ell "timePosition"
. text
_bbox = _Object . ix "bbox"
coordinates = _Object . ix "geometry" . _Object . ix "coordinates" -- . _Array 0
extent bound = dataIdentification
./ ell "extent"
./ ell "EX_Extent"
./ ell "geographicElement"
./ ell "EX_GeographicBoundingBox"
./ ell bound
./ ell "Decimal"
. text
title = citation ./ ell "title" ./ ell "CharacterString" . text
abstract = dataIdentification ./ ell "abstract" ./ ell "CharacterString" . text
_links pos key = _Object . ix "links" . _Array . ix pos . _Object . ix key . _String
_assets asset key = _Object . ix "assets" . _Object . ix asset . _Object . ix key . _String
getStacJson :: T.Text -> T.Text -> T.Text -> T.Text -> String -> String -> IO Value
getStacJson root rgbOrIrc bits date jsonFile xmlFile = do
jsonBS <- B.readFile jsonFile
xml <- Text.XML.readFile def xmlFile
let jsonBaseName = T.pack $ takeBaseName xmlFile
let coverage = "sen2" <> rgbOrIrc <> bits <> "b"
let str l = (String $ xml ^. l)
let Just json = decode jsonBS :: Maybe Value
-- Dbg.traceIO $ show $ json ^? _links 2 "href"
let num l = (Number $ read $ T.unpack $ xml ^. l)
let bbox = V.map (num . extent) $ V.fromList [ "westBoundLongitude"
, "southBoundLatitude"
, "eastBoundLongitude"
, "northBoundLatitude" ]
let alist = Array . V.fromList
let mkPoint p = alist $ List.map ((bbox V.!)) p
let coords = alist [
alist $ List.map mkPoint [ [ 2, 1 ]
, [ 0, 1 ]
, [ 0, 3 ]
, [ 2, 3 ] ]
]
return $ json & dstId .~ (String $ coverage <> "_" <> date) -- str idLens
& properties "start_datetime" .~ (String $ (xml ^. idTemporal "begin") <> "T00:00Z")
& properties "end_datetime" .~ (String $ (xml ^. idTemporal "end") <> "T00:00Z")
& properties "title" .~ String date
& properties "description" .~ str title
& _bbox .~ Array bbox
& _links 0 "href" .~ root <> "/STAC/catalog.json"
& _links 1 "href" .~ root <> "/STAC/" <> coverage <> "/collection_" <> coverage <> ".json"
& _links 2 "href" .~ root <> "/STAC/" <> coverage <> "/item/" <> jsonBaseName <> ".json"
& _links 2 "title".~ date
& _links 1 "title" .~ "collection_" <> coverage
& _assets "visual" "title" .~ "imatge " <> rgbOrIrc <> " de " <> bits <> " bits"
& _assets "visual" "href" .~ "../item/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.tif"
& _assets "thumbnail" "href" .~ "../thumbnail/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.tif"
& _assets "iso19115" "href" .~ "../data/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.xml"
& coordinates .~ coords
substr :: Int -> Int -> [a] -> [a]
substr from to xs = List.take (to - from + 1) (List.drop from xs)
main = do
[root, jsonFile, srcXmlFile] <- getArgs -- Gross...
let baseName = takeBaseName srcXmlFile
let date = T.pack $ List.takeWhile (/= '_') $ List.tail $ List.dropWhile (/= '_') $ baseName
rgbOrIrc <- case (substr 4 6 baseName) of
"rgb" -> return $ T.pack "rgb"
"irc" -> return $ T.pack "irc"
e -> do
Prelude.putStrLn $ "Error obtenint rgb/irc de "<> e <> " de: " <> (takeBaseName srcXmlFile )
exitWith (ExitFailure 42)
bits <- case (substr 7 9 baseName) of
"8bv" -> return $ T.pack "8"
"16b" -> return $ T.pack "16"
e -> do
Prelude.putStrLn $ "Error obtenint bits"<> e<> " de: " <> srcXmlFile
exitWith (ExitFailure 42)
-- let date = T.pack (substr 19 24 baseName)
modified <- getStacJson (T.pack root) rgbOrIrc bits date jsonFile srcXmlFile
-- Dbg.traceIO $ show $ (Number $ read $ T.unpack $ xml ^. (extent "westBoundLongitude"))
-- T.putStrLn $ T.pack $ show json
B.putStrLn $ encodePretty modified
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment