Created
March 18, 2022 18:07
-
-
Save jarnaldich/64df23f9c6e60bcc2625a37ba4b07053 to your computer and use it in GitHub Desktop.
XML to JSON in Haskell sample #haskell #json #xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env stack | |
{- | |
stack | |
--install-ghc runghc | |
--package aeson | |
--package aeson-pretty | |
--package lens-aeson | |
--package xml-lens | |
-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE ScopedTypeVariables #-} | |
module Main where | |
import System.Environment (getArgs) | |
import Data.Text as T | |
import Data.Text.IO as T | |
import Data.ByteString.Lazy.Char8 as B | |
import Data.Vector as V | |
import Data.List as List | |
import System.Exit | |
import System.FilePath (takeBaseName, takeFileName) | |
import Debug.Trace as Dbg | |
import Data.Aeson (Value(..), encode, decode) | |
import Data.Aeson.Lens | |
import Data.Aeson.Encode.Pretty (encodePretty) | |
import Text.XML | |
import Text.XML.Lens | |
dstId = _Object .ix "id" | |
dataIdentification = root | |
. ell "MD_Metadata" | |
./ ell "identificationInfo" | |
./ ell "MD_DataIdentification" | |
citation = dataIdentification | |
./ ell "Citation" | |
./ ell "CI_Citation" | |
idLens = citation | |
./ ell "identifier" | |
./ ell "RS_Identifier" | |
./ ell "code" | |
./ ell "CharacterString" | |
. text | |
properties field = _Object . ix "properties" . _Object . ix field | |
idTemporal beginOrEnd = dataIdentification | |
./ ell "extent" | |
./ ell "EX_Extent" | |
./ ell "temporalElement" | |
./ ell "EX_TemporalExtent" | |
./ ell "extent" | |
./ ell "TimePeriod" | |
./ ell beginOrEnd | |
./ ell "TimeInstant" | |
./ ell "timePosition" | |
. text | |
_bbox = _Object . ix "bbox" | |
coordinates = _Object . ix "geometry" . _Object . ix "coordinates" -- . _Array 0 | |
extent bound = dataIdentification | |
./ ell "extent" | |
./ ell "EX_Extent" | |
./ ell "geographicElement" | |
./ ell "EX_GeographicBoundingBox" | |
./ ell bound | |
./ ell "Decimal" | |
. text | |
title = citation ./ ell "title" ./ ell "CharacterString" . text | |
abstract = dataIdentification ./ ell "abstract" ./ ell "CharacterString" . text | |
_links pos key = _Object . ix "links" . _Array . ix pos . _Object . ix key . _String | |
_assets asset key = _Object . ix "assets" . _Object . ix asset . _Object . ix key . _String | |
getStacJson :: T.Text -> T.Text -> T.Text -> T.Text -> String -> String -> IO Value | |
getStacJson root rgbOrIrc bits date jsonFile xmlFile = do | |
jsonBS <- B.readFile jsonFile | |
xml <- Text.XML.readFile def xmlFile | |
let jsonBaseName = T.pack $ takeBaseName xmlFile | |
let coverage = "sen2" <> rgbOrIrc <> bits <> "b" | |
let str l = (String $ xml ^. l) | |
let Just json = decode jsonBS :: Maybe Value | |
-- Dbg.traceIO $ show $ json ^? _links 2 "href" | |
let num l = (Number $ read $ T.unpack $ xml ^. l) | |
let bbox = V.map (num . extent) $ V.fromList [ "westBoundLongitude" | |
, "southBoundLatitude" | |
, "eastBoundLongitude" | |
, "northBoundLatitude" ] | |
let alist = Array . V.fromList | |
let mkPoint p = alist $ List.map ((bbox V.!)) p | |
let coords = alist [ | |
alist $ List.map mkPoint [ [ 2, 1 ] | |
, [ 0, 1 ] | |
, [ 0, 3 ] | |
, [ 2, 3 ] ] | |
] | |
return $ json & dstId .~ (String $ coverage <> "_" <> date) -- str idLens | |
& properties "start_datetime" .~ (String $ (xml ^. idTemporal "begin") <> "T00:00Z") | |
& properties "end_datetime" .~ (String $ (xml ^. idTemporal "end") <> "T00:00Z") | |
& properties "title" .~ String date | |
& properties "description" .~ str title | |
& _bbox .~ Array bbox | |
& _links 0 "href" .~ root <> "/STAC/catalog.json" | |
& _links 1 "href" .~ root <> "/STAC/" <> coverage <> "/collection_" <> coverage <> ".json" | |
& _links 2 "href" .~ root <> "/STAC/" <> coverage <> "/item/" <> jsonBaseName <> ".json" | |
& _links 2 "title".~ date | |
& _links 1 "title" .~ "collection_" <> coverage | |
& _assets "visual" "title" .~ "imatge " <> rgbOrIrc <> " de " <> bits <> " bits" | |
& _assets "visual" "href" .~ "../item/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.tif" | |
& _assets "thumbnail" "href" .~ "../thumbnail/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.tif" | |
& _assets "iso19115" "href" .~ "../data/" <> coverage <> "v10tf0f01ss1_" <> date <> "_0.xml" | |
& coordinates .~ coords | |
substr :: Int -> Int -> [a] -> [a] | |
substr from to xs = List.take (to - from + 1) (List.drop from xs) | |
main = do | |
[root, jsonFile, srcXmlFile] <- getArgs -- Gross... | |
let baseName = takeBaseName srcXmlFile | |
let date = T.pack $ List.takeWhile (/= '_') $ List.tail $ List.dropWhile (/= '_') $ baseName | |
rgbOrIrc <- case (substr 4 6 baseName) of | |
"rgb" -> return $ T.pack "rgb" | |
"irc" -> return $ T.pack "irc" | |
e -> do | |
Prelude.putStrLn $ "Error obtenint rgb/irc de "<> e <> " de: " <> (takeBaseName srcXmlFile ) | |
exitWith (ExitFailure 42) | |
bits <- case (substr 7 9 baseName) of | |
"8bv" -> return $ T.pack "8" | |
"16b" -> return $ T.pack "16" | |
e -> do | |
Prelude.putStrLn $ "Error obtenint bits"<> e<> " de: " <> srcXmlFile | |
exitWith (ExitFailure 42) | |
-- let date = T.pack (substr 19 24 baseName) | |
modified <- getStacJson (T.pack root) rgbOrIrc bits date jsonFile srcXmlFile | |
-- Dbg.traceIO $ show $ (Number $ read $ T.unpack $ xml ^. (extent "westBoundLongitude")) | |
-- T.putStrLn $ T.pack $ show json | |
B.putStrLn $ encodePretty modified |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment