Skip to content

Instantly share code, notes, and snippets.

@bwbroersma
Last active February 22, 2022 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bwbroersma/152c88f5919a72aaad85efaf84498719 to your computer and use it in GitHub Desktop.
Save bwbroersma/152c88f5919a72aaad85efaf84498719 to your computer and use it in GitHub Desktop.
How to convert EMLNL candidate lists to CSV and JSON
#!/bin/bash
# c2j can be found here https://github.com/bwbroersma/csv2jsonl/ or be replace with the slower csvjson from csvkit, use --stream! (https://csvkit.readthedocs.io/en/latest/scripts/csvjson.html)
# add -C for the generated xslt (added below)
TS="$(date -u -Iminute|sed 's/+00:00//g;s/:/./')"
CSV="GR2022_alle-kandidaten_$TS.csv";
JSON="GR2022_alle-kandidaten_$TS.json";
(echo -e "\xEF\xBB\xBFGemeenteCode;GemeenteNaam;LijstNummer;LijstNaam;DocumentTaal;PublicatieGeslacht;KandidaatNummer;Initialen;Roepnaam;Tussenvoegsel;Achternaam;Woonplaats;Land;Geslacht";
xmlstarlet sel \
-N eml="urn:oasis:names:tc:evs:schema:eml" \
-N kr="http://www.kiesraad.nl/extensions" \
-N xNL="urn:oasis:names:tc:ciq:xsdschema:xNL:2.0" \
-N xAL="urn:oasis:names:tc:ciq:xsdschema:xAL:2.0" \
-T -t \
--var regionCode='string(/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/@Id)' \
--var regionName='/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/text()'\
-m '/eml:EML/eml:CandidateList/eml:Election/eml:Contest/eml:Affiliation' \
--var listId='number(eml:AffiliationIdentifier/@Id)' \
--var listName \
--if 'eml:AffiliationIdentifier/eml:RegisteredName/text()' \
--if "contains(eml:AffiliationIdentifier/eml:RegisteredName/text(),'\"')" \
-c "concat('\"',str:replace(eml:AffiliationIdentifier/eml:RegisteredName/text(),'\"','\"\"'),'\"')" \
--else \
-c 'eml:AffiliationIdentifier/eml:RegisteredName/text()' \
-b \
--else \
--var candidatePersonName='./eml:Candidate[eml:CandidateIdentifier/@Id=1]/eml:CandidateFullName/xNL:PersonName' \
--var candidateLastName \
--if '$candidatePersonName/xNL:NamePrefix' \
-c 'concat($candidatePersonName/xNL:NamePrefix/text()," ",$candidatePersonName/xNL:LastName/text())' \
--else \
-c '$candidatePersonName/xNL:LastName/text()' \
-b \
-b \
-c 'concat("Blanco (",$candidateLastName,", ",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),")")' \
-b \
-b \
--var publicationLanguage='kr:ListData/@PublicationLanguage' \
--var publishGender \
--if 'kr:ListData[@PublishGender="true"]' \
-o 'ja' \
--else \
-o 'nee' \
-b \
-b \
--var prefix='concat($regionCode,";",$regionName,";",$listId,";",$listName,";",$publicationLanguage,";",$publishGender,";")' \
-m 'eml:Candidate' \
--var candidatePersonName='eml:CandidateFullName/xNL:PersonName' \
-c 'concat($prefix,string(eml:CandidateIdentifier/@Id),";",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),";",$candidatePersonName/xNL:FirstName/text(),";",$candidatePersonName/xNL:NamePrefix/text(),";",$candidatePersonName/xNL:LastName/text(),";",eml:QualifyingAddress//xAL:Locality/xAL:LocalityName/text(),";",substring(concat(.//xal:CountryNameCode,"NL"),1,2),";",translate(substring(eml:Gender,1,1),"fu","v?"))' \
-n \
-b \
-b \
*/K*.eml.xml) | sed -z 's/\n/\r\n/g' > "$CSV";
cat "$CSV" | c2j -d ';' | jq -sc 'group_by(.GemeenteCode,.GemeenteNaam)|map({GemeenteCode:(.[0].GemeenteCode|tostring),GemeenteNaam:.[0].GemeenteNaam,Lijsten:(map(del(.GemeenteCode,.GemeenteNaam))|group_by(.LijstNummer,.LijstNaam,.DocumentTaal,.PublicatieGeslacht)|map({LijstNummer:.[0].LijstNummer,LijstNaam:.[0].LijstNaam,DocumentTaal:.[0].DocumentTaal,PublicatieGeslacht:(.[0].PublicatieGeslacht=="ja"),Kandidaten:map(del(.LijstNummer,.LijstNaam,.DocumentTaal,.PublicatieGeslacht))}))})' > "$JSON";
exit;
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:eml="urn:oasis:names:tc:evs:schema:eml" xmlns:kr="http://www.kiesraad.nl/extensions" xmlns:xNL="urn:oasis:names:tc:ciq:xsdschema:xNL:2.0" xmlns:xAL="urn:oasis:names:tc:ciq:xsdschema:xAL:2.0" xmlns:str="http://exslt.org/strings" xmlns:xalanredirect="org.apache.xalan.xslt.extensions.Redirect" version="1.0" extension-element-prefixes="str xalanredirect">
<xsl:output omit-xml-declaration="yes" indent="no" method="text"/>
<xsl:template match="/">
<xsl:variable select="string(/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/@Id)" name="regionCode"/>
<xsl:variable select="/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/text()" name="regionName"/>
<xsl:for-each select="/eml:EML/eml:CandidateList/eml:Election/eml:Contest/eml:Affiliation">
<xsl:variable select="number(eml:AffiliationIdentifier/@Id)" name="listId"/>
<xsl:variable name="listName">
<xsl:choose>
<xsl:when test="eml:AffiliationIdentifier/eml:RegisteredName/text()">
<xsl:choose>
<xsl:when test="contains(eml:AffiliationIdentifier/eml:RegisteredName/text(),'&quot;')">
<xsl:copy-of select="concat('&quot;',str:replace(eml:AffiliationIdentifier/eml:RegisteredName/text(),'&quot;','&quot;&quot;'),'&quot;')"/>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="eml:AffiliationIdentifier/eml:RegisteredName/text()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:variable select="./eml:Candidate[eml:CandidateIdentifier/@Id=1]/eml:CandidateFullName/xNL:PersonName" name="candidatePersonName"/>
<xsl:variable name="candidateLastName">
<xsl:choose>
<xsl:when test="$candidatePersonName/xNL:NamePrefix">
<xsl:copy-of select="concat($candidatePersonName/xNL:NamePrefix/text(),&quot; &quot;,$candidatePersonName/xNL:LastName/text())"/>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="$candidatePersonName/xNL:LastName/text()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:copy-of select="concat(&quot;Blanco (&quot;,$candidateLastName,&quot;, &quot;,$candidatePersonName/xNL:NameLine[@NameType=&quot;Initials&quot;]/text(),&quot;)&quot;)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable select="kr:ListData/@PublicationLanguage" name="publicationLanguage"/>
<xsl:variable name="publishGender">
<xsl:choose>
<xsl:when test="kr:ListData[@PublishGender=&quot;true&quot;]">
<xsl:text>ja</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:text>nee</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable select="concat($regionCode,&quot;;&quot;,$regionName,&quot;;&quot;,$listId,&quot;;&quot;,$listName,&quot;;&quot;,$publicationLanguage,&quot;;&quot;,$publishGender,&quot;;&quot;)" name="prefix"/>
<xsl:for-each select="eml:Candidate">
<xsl:variable select="eml:CandidateFullName/xNL:PersonName" name="candidatePersonName"/>
<xsl:copy-of select="concat($prefix,string(eml:CandidateIdentifier/@Id),&quot;;&quot;,$candidatePersonName/xNL:NameLine[@NameType=&quot;Initials&quot;]/text(),&quot;;&quot;,$candidatePersonName/xNL:FirstName/text(),&quot;;&quot;,$candidatePersonName/xNL:NamePrefix/text(),&quot;;&quot;,$candidatePersonName/xNL:LastName/text(),&quot;;&quot;,eml:QualifyingAddress//xAL:Locality/xAL:LocalityName/text(),&quot;;&quot;,substring(concat(.//xal:CountryNameCode,&quot;NL&quot;),1,2),&quot;;&quot;,translate(substring(eml:Gender,1,1),&quot;fu&quot;,&quot;v?&quot;))"/>
<xsl:value-of select="'&#10;'"/>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment