Skip to content

Instantly share code, notes, and snippets.

@ColinMaudry
Last active January 23, 2018 15:44
Show Gist options
  • Save ColinMaudry/5169cb4e285ca94a160272b7b59a5411 to your computer and use it in GitHub Desktop.
Save ColinMaudry/5169cb4e285ca94a160272b7b59a5411 to your computer and use it in GitHub Desktop.
Conversion des données XML du BOAMP vers JSON-LD
{
"@context": {
"@vocab": "https://data.maudry.com/rdf/boamp#",
"boamp": "https://data.maudry.com/voc/boamp#",
"dct": "http://purl.org/dc/terms/",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"attributions": "https://boamp.maudry.com/attributions/",
"appeloffres": "https://boamp.maudry.com/appeloffres/",
"acheteurs": "https://boamp.maudry.com/acheteurs/",
"titulaires": "https://boamp.maudry.com/titulaires/",
"descripteurs": "https://boamp.maudry.com/descripteurs/",
"lots": "https://boamp.maudry.com/lots/",
"cpv": "https://boamp.maudry.com/cpv/",
"annonce": {
"@id": "boamp:annonce",
"@type": "rdfs:Class",
"rdfs:label": "Annonce"
},
"appel-offre": {
"@id": "boamp:appel-offre",
"@type": "rdfs:Class",
"rdfs:label": "Appel d'offres",
"rdfs:subClassOf": "boamp:annonce"
},
"attribution": {
"@id": "boamp:attribution",
"@type": "rdfs:Class",
"rdfs:label": "Attribution"
},
"Acheteur": {
"@id": "boamp:Acheteur",
"@type": "rdfs:Class",
"rdfs:label": "Acheteur"
},
"Cpv": {
"@id": "boamp:Cpv",
"@type": "rdfs:Class",
"rdfs:label": "Code CPV"
},
"acheteur": {
"@id": "boamp:acheteur",
"@type": "@id",
"rdfs:label": "Acheteur"
},
"Descripteur": {
"@id": "boamp:Descripteur",
"@type": "rdfs:Class",
"rdfs:label": "Descripteur"
},
"Lot": {
"@id": "boamp:Lot",
"@type": "rdfs:Class",
"rdfs:label": "Lot"
},
"etat": {
"@id": "boamp:etat",
"@type": "rdfs:Class",
"rdfs:label": "État"
},
"region": {
"@id": "boamp:region",
"@type": "rdfs:Class",
"rdfs:label": "Région"
},
"departement": {
"@id": "boamp:departement",
"@type": "rdfs:Class",
"rdfs:label": "Département"
},
"commune": {
"@id": "boamp:commune",
"@type": "rdfs:Class",
"rdfs:label": "Commune"
}
}
}
#!/bin/bash
# xml2json = https://github.com/Cheedoong/xml2json
count=`ls $1/*.xml | wc -l`
i=0
echo "$count XML files to process."
echo ""
echo "Converting XML files to JSON..."
for file in `ls $1/*.xml | xargs`
do
./xml2json $file > $file.json
done
echo "Categorizing files by nature..."
mkdir $1/ATTRIBUTION
mkdir $1/APPEL_OFFRE
mkdir $1/INTENTION_CONCLURE
mkdir $1/RECTIFICATIF
for file in `ls $1/*.json | xargs`
do
nature=`cat $file | grep -oP '(?<="NATURE":{")[A-Z_]*(?=")' | head -n 1`
mv -v $file $1/$nature/ | grep "failed"
done
countAttri=`ls $1/ATTRIBUTION/*.json | wc -l`
countAppel=`ls $1/APPEL_OFFRE/*.json | wc -l`
countIntention=`ls $1/INTENTION_CONCLURE/*.json | wc -l`
countRecti=`ls $1/RECTIFICATIF/*.json | wc -l`
success=$(( countAttri + countAppel + countIntention + countRecti))
failed=$(( count - success ))
echo ""
echo "Attributions: $countAttri"
echo "Appels d'offres: $countAppel"
echo "Intentions de conclure: $countIntention"
echo "Rectificatifs: $countRecti"
echo ""
echo "Success: $success"
echo "Failed: $failed"
echo ""
echo "Converting appel d'offres to JSON-LD..."
for file in `ls $1/APPEL_OFFRE/*.json | xargs`
do
jq -f filtre-appel-offres.jq $file > $file.jsonld
done
countAppelLd=`ls $1/APPEL_OFFRE/*.jsonld | wc -l`
echo "Success: $countAppelLd"
echo "Failed: $(($countAppel - $countAppelLd))"
echo ""
echo "Converting attributions to JSON-LD..."
for file in `ls $1/ATTRIBUTION/*.json | xargs`
do
jq -f filtre-attribution.jq $file > $file.jsonld
done
countAttriLd=`ls $1/ATTRIBUTION/*.jsonld | wc -l`
echo "Success: $countAttriLd"
echo "Failed: $(($countAttri - $countAttriLd))"
. |
def walk(f):
. as $in
| if type == "object" then
reduce keys[] as $key
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
elif type == "array" then map( walk(f) ) | f
else f
end;
.ANNONCE as $annonce |
.ANNONCE.GESTION.REFERENCE as $reference |
.ANNONCE.DONNEES as $donnees |
$donnees.OBJET as $objet |
$reference.IDWEB as $idweb |
$reference.TYPE_AVIS.NATURE | keys[0] | ascii_downcase as $type |
{
"@context": "https://gist.githubusercontent.com/ColinMaudry/5169cb4e285ca94a160272b7b59a5411/raw/c05b569bc7c37297c1f91424d58b99bf325f7000/boamp-context.jsonld",
"@type": ("boamp:" + $type),
"@id": ("appeloffres:" + $idweb),
"boamp:idweb": $idweb,
"dct:identifier": $objet.REF_MARCHE,
"boamp:famille": $reference.TYPE_AVIS.FAMILLE | keys[0] | ascii_downcase,
"boamp:statut": $reference.TYPE_AVIS.STATUT | keys[0] | ascii_downcase,
"boamp:nomHtml": $annonce.GESTION.NOM_HTML,
"boamp:cpvPrincipal": (
if ($objet.CPV | type) == "objet" then
{
"@id":("cpv:" + $objet.CPV.PRINCIPAL),
"rdfs:label": $objet.CPV.PRINCIPAL,
"@type": "boamp:Cpv"
} elif ($objet.CPV | type) == "array" then
$objet.CPV | map(
{
"@id":("cpv:" + .PRINCIPAL),
"rdfs:label": .PRINCIPAL,
"@type": "boamp:Cpv"
}) else null end),
"boamp:debutDiffusion": {
"@value": $annonce.GESTION.INDEXATION.DATE_PUBLICATION,
"@type": "xsd:date"
},
"boamp:finDiffusion": {
"@value": $annonce.GESTION.INDEXATION.DATE_FIN_DIFFUSION,
"@type": "xsd:date"
},
"boamp:descripteur": $annonce.GESTION.INDEXATION.DESCRIPTEURS.DESCRIPTEUR |
walk(if type == "object" then {"@id": ("descripteurs:" + .CODE)} elif type == "array" then map({"@id": (.["@id"])}) else . end),
"boamp:lot": (
$objet.LOTS.LOT | if type == "array" then
map(
{
"@id":("lots:" + ($objet.REF_MARCHE // $idweb) + "_" + (.NUM | gsub("\\W";"_"))),
"@type": "boamp:Lot",
"rdfs:label": .INTITULE,
"boamp:cpvPrincipal": (
if (.CPV | type) == "objet" then
{
"@id":("cpv:" + .CPV.PRINCIPAL),
"rdfs:label": .CPV.PRINCIPAL,
"@type": "boamp:Cpv"
} elif (.CPV | type) == "array" then
.CPV | map(
{
"@id":("cpv:" + .PRINCIPAL),
"rdfs:label": .PRINCIPAL,
"@type": "boamp:Cpv"
}) else null end)
}
) elif type == "objet" then
{
"@id":("lots:" + ($objet.REF_MARCHE // $idweb) + "_" + (.NUM | gsub("\\W";"_")))
}
else null
end ),
#"boamp:criteresSociauxEnv": ,
#"boamp:departementPublication": "27",
#"boamp:resumeObjet":"Pré du Bel Ebat - Accès pompiers SMAC : Plantations. Pré du Bel Ebat à Evreux"
"boamp:typeProcedure": (if ($donnees.PROCEDURE.TYPE_PROCEDURE | type) == "object" then
($donnees.PROCEDURE.TYPE_PROCEDURE | keys[0] | ascii_downcase)
else
null end),
"boamp:eligibleMps": (if ($donnees.CONDITION_PARTICIPATION.ELIGIBLE_MPS | type) == "object" then
($donnees.CONDITION_PARTICIPATION.ELIGIBLE_MPS | keys[0])
else
null end),
"boamp:acheteur": {
"@id": ("acheteurs:" +
($donnees.IDENTITE.DENOMINATION |
gsub("\\W";"-") )),
"boamp:profilAcheteur": (
if ($donnees.IDENTITE.URL_PROFIL_ACHETEUR | type) == "string" then
{"@id": $donnees.IDENTITE.URL_PROFIL_ACHETEUR} else null end)
,
"rdfs:label": $donnees.IDENTITE.DENOMINATION,
"boamp:codePostal": $donnees.IDENTITE.CP,
"@type": (if ($donnees.TYPE_POUVOIR_ADJUDICATEUR | type) == "object" then
("boamp:" + ($donnees.TYPE_POUVOIR_ADJUDICATEUR | keys[0] |
if . == "AUTRE" then
$donnees.TYPE_POUVOIR_ADJUDICATEUR.AUTRE | ascii_downcase | gsub("\\W";"-") else
. | ascii_downcase end)
) else
"boamp:Acheteur" end)
},
"boamp:objetComplet": $objet.OBJET_COMPLET,
"boamp:titreMarche": $objet.TITRE_MARCHE
#"boamp:criteres":{},
}
. |
def walk(f):
. as $in
| if type == "object" then
reduce keys[] as $key
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
elif type == "array" then map( walk(f) ) | f
else f
end;
.ANNONCE as $annonce |
.ANNONCE.GESTION.REFERENCE as $reference |
.ANNONCE.DONNEES as $donnees |
$donnees.ATTRIBUTION.DECISION as $decision |
$reference.IDWEB as $idweb |
$reference.TYPE_AVIS.NATURE | keys[0] | ascii_downcase as $type |
{
"@context": "https://gist.githubusercontent.com/ColinMaudry/5169cb4e285ca94a160272b7b59a5411/raw/c05b569bc7c37297c1f91424d58b99bf325f7000/boamp-context.jsonld",
"@type": ("boamp:" + $type),
"@id": ($type + "s:" + $idweb),
"boamp:idweb": $idweb,
"dct:identifier":"",
"boamp:famille": $reference.TYPE_AVIS.FAMILLE | keys[0] | ascii_downcase,
"boamp:statut": $reference.TYPE_AVIS.STATUT | keys[0] | ascii_downcase,
"boamp:appelOffres" : $annonce.GESTION.MARCHE.ANNONCE_ANTERIEUR |
(if type == "array" then map({"@id":.REFERENCE.IDWEB})
elif type == "objet" then
{"@id":.REFERENCE.IDWEB}
else null
end)
,
"boamp:nomHtml": $annonce.GESTION.NOM_HTML,
"boamp:debutDiffusion": {
"@value": $annonce.GESTION.INDEXATION.DATE_PUBLICATION,
"@type": "xsd:date"
},
"boamp:finDiffusion": {
"@value": $annonce.GESTION.INDEXATION.DATE_FIN_DIFFUSION,
"@type": "xsd:date"
},
"boamp:descripteur": $annonce.GESTION.INDEXATION.DESCRIPTEURS.DESCRIPTEUR |
walk(if type == "object" then {"@id": ("descripteurs:" + .CODE)} elif type == "array" then map({"@id": (.["@id"])}) else . end),
#"boamp:criteresSociauxEnv": ,
#"boamp:departementPublication": "27",
#"boamp:resumeObjet":"Pré du Bel Ebat - Accès pompiers SMAC : Plantations. Pré du Bel Ebat à Evreux"
"boamp:acheteur": (
if $donnees.IDENTITE.DENOMINATION then {
"@id": ("acheteurs:" +
($donnees.IDENTITE.DENOMINATION |
gsub("\\W";"-") )),
"boamp:profilAcheteur": (
if $donnees.IDENTITE.URL_PROFIL_ACHETEUR then {
"@id": $donnees.IDENTITE.URL_PROFIL_ACHETEUR
} else null end),
"rdfs:label": $donnees.IDENTITE.DENOMINATION,
"boamp:codePostal": $donnees.IDENTITE.CP,
"@type": (if ($donnees.TYPE_ORGANISME | type) == "object" then
("boamp:" + ($donnees.TYPE_ORGANISME | keys[0] | ascii_downcase)) else
"boamp:Acheteur" end)
} else null end),
"boamp:objetComplet": $donnees.OBJET.OBJET_COMPLET,
#"boamp:criteres":{},
"boamp:dateDecisionAttribution": $donnees.ATTRIBUTION.DATE_DECISION,
"boamp:titulaire": (
if ($decision | type) == "object" then
if ($decision.TITULAIRE | type) == "object" then
{
"@id": ("titulaires:" + ($decision.TITULAIRE.DENOMINATION | gsub("\\W";"-")) + "-" + $decision.TITULAIRE.CP),
"boamp:codePostal": $decision.TITULAIRE.CP
}
else null end
elif ($decision | type) == "array" then
null else null end),
"boamp:valeurTotale": (if ($donnees.ATTRIBUTION.VALEUR_TOTALE | type) == "object" then {
"boamp:devise":$donnees.ATTRIBUTION.VALEUR_TOTALE["@DEVISE"],
"boamp:montant": (($donnees.ATTRIBUTION.VALEUR_TOTALE["#text"] | tonumber) // null)
} else null end),
"boamp:montantAttribue": (if ($decision | type) == "object" and ($decision.RENSEIGNEMENT.MONTANT | type) == "object" then {
"boamp:devise":$decision.RENSEIGNEMENT.MONTANT["@DEVISE"],
"boamp:montant": (($decision.RENSEIGNEMENT.MONTANT["#text"] | tonumber) // null)
} else null end)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment