Skip to content

Instantly share code, notes, and snippets.

@ebruchez
Created September 27, 2011 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ebruchez/1245692 to your computer and use it in GitHub Desktop.
Save ebruchez/1245692 to your computer and use it in GitHub Desktop.
Unzipping an Excel 2007 .xlsx file and extracting data in XPL/XSLT
<!-- Unzip Excel file -->
<p:processor name="oxf:unzip">
<p:input name="data" href="#zip"/>
<p:output name="data" id="zip-file-list"/>
</p:processor>
<!-- Extract Excel file's first sheet data as a series of rows -->
<p:processor name="oxf:xslt">
<p:input name="data" href="#zip-file-list"/>
<p:input name="config">
<rows xsl:version="2.0"
xmlns:ms-main="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:ms-orels="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:ms-prels="http://schemas.openxmlformats.org/package/2006/relationships">
<!-- Fixed documents -->
<xsl:variable name="workbook" select="doc(/*/file[@name = 'xl/workbook.xml'])" as="document-node()"/>
<xsl:variable name="rels" select="doc(/*/file[@name = 'xl/_rels/workbook.xml.rels'])" as="document-node()"/>
<xsl:variable name="strings" select="doc(/*/file[@name = 'xl/sharedStrings.xml'])" as="document-node()"/>
<!-- Open first sheet -->
<xsl:variable name="sheet-rid" select="$workbook/*/ms-main:sheets/ms-main:sheet[1]/@ms-orels:id" as="xs:string"/>
<xsl:variable name="sheet-filename" select="$rels/*/ms-prels:Relationship[@Id = $sheet-rid]/@Target" as="xs:string"/>
<xsl:variable name="sheet" select="doc(/*/file[@name = concat('xl/', $sheet-filename)])" as="document-node()"/>
<xsl:for-each select="$sheet/*/ms-main:sheetData/ms-main:row">
<!-- Format row -->
<xsl:variable name="row" as="element(row)">
<row>
<xsl:for-each select="ms-main:c">
<xsl:variable name="v" select="ms-main:v"/>
<c><xsl:value-of select="if (exists(@t)) then $strings/*/ms-main:si[xs:integer($v) + 1]/ms-main:t else $v"/></c>
</xsl:for-each>
</row>
</xsl:variable>
<!-- Only output non-blank rows -->
<xsl:if test="exists($row/c[normalize-space()])">
<xsl:copy-of select="$row"/>
</xsl:if>
</xsl:for-each>
</rows>
</p:input>
<p:output name="data" id="rows"/>
</p:processor>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment