Skip to content

Instantly share code, notes, and snippets.

@niklasl
Created November 13, 2011 16:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niklasl/1362314 to your computer and use it in GitHub Desktop.
Save niklasl/1362314 to your computer and use it in GitHub Desktop.
Using XSLT to extract RDFa usage statistics
<!DOCTYPE xsl:stylesheet [
<!ENTITY tab "&#9;">
<!ENTITY lf "&#10;">
]>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:str="http://exslt.org/strings">
<xsl:param name="base-url">&lt;stdin&gt;</xsl:param>
<xsl:param name="include-header">yes</xsl:param>
<xsl:output method="text" encoding="utf-8"/>
<xsl:template match="/">
<xsl:if test="$include-header = 'yes'">
<xsl:text>base-url&tab;</xsl:text>
<xsl:text>tree-depth&tab;</xsl:text>
<xsl:text>prefix&tab;</xsl:text>
<xsl:text>vocab&tab;</xsl:text>
<xsl:text>xmlns&tab;</xsl:text>
<xsl:text>is-hanging&tab;</xsl:text>
<xsl:text>about&tab;</xsl:text>
<xsl:text>typeof&tab;</xsl:text>
<xsl:text>rel&tab;</xsl:text>
<xsl:text>rev&tab;</xsl:text>
<xsl:text>href&tab;</xsl:text>
<xsl:text>resource&tab;</xsl:text>
<xsl:text>property&tab;</xsl:text>
<xsl:text>datatype&tab;</xsl:text>
<xsl:text>content&tab;</xsl:text>
<xsl:text>lang&tab;</xsl:text>
<xsl:text>child-count&lf;</xsl:text>
</xsl:if>
<xsl:apply-templates select="*"/>
</xsl:template>
<xsl:template match="*">
<xsl:param name="tree-depth" select="0"/>
<xsl:param name="hanging" select="false()"/>
<xsl:if test="@vocab | @about | @typeof | @rel | @rev | @href | @resource | @property | @datatype | @content | @lang | @xml:lang">
<xsl:value-of select="$base-url"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="$tree-depth"/><xsl:text>&tab;</xsl:text>
<!-- NOTE: counts prefix:uri pairs -->
<xsl:value-of select="count(str:split(str:replace(normalize-space(@prefix), ': ', ':')))"/><xsl:text>&tab;</xsl:text>
<xsl:call-template name="boolean-column">
<xsl:with-param name="predicate" select="@vocab"/>
</xsl:call-template>
<!-- NOTE: counts namespaces in scope -->
<xsl:value-of select="count(namespace::*)"/><xsl:text>&tab;</xsl:text>
<xsl:call-template name="boolean-column">
<xsl:with-param name="predicate" select="$hanging"/>
</xsl:call-template>
<!-- NOTE: counts whitespace-separated items -->
<xsl:value-of select="count(str:split(normalize-space(@about)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@typeof)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@rel)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@rev)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@href)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@resource)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@property)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@datatype)))"/><xsl:text>&tab;</xsl:text>
<xsl:value-of select="count(str:split(normalize-space(@content)))"/><xsl:text>&tab;</xsl:text>
<xsl:call-template name="boolean-column">
<xsl:with-param name="predicate" select="count(@lang | @xml:lang) > 0"/>
</xsl:call-template>
<xsl:value-of select="count(node())"/><xsl:text>&lf;</xsl:text>
</xsl:if>
<xsl:apply-templates select="*">
<xsl:with-param name="tree-depth" select="$tree-depth + 1"/>
<xsl:with-param name="hanging" select="((@rel | @rev) and not(@resource | @href | @typeof) or
$hanging and not (@resource | @href | @typeof | @about))"/>
</xsl:apply-templates>
</xsl:template>
<xsl:template match="node()" priority="-1"/>
<xsl:template name="boolean-column">
<xsl:param name="predicate"/>
<xsl:choose>
<xsl:when test="$predicate">true</xsl:when>
<xsl:otherwise>false</xsl:otherwise>
</xsl:choose>
<xsl:text>&tab;</xsl:text>
</xsl:template>
</xsl:stylesheet>
#!/bin/bash
here=$(dirname $0)
rdfa_testsuite=$1
for xhtml in $(ls $rdfa_testsuite/*.xhtml); do
xsltproc --html --novalid --stringparam base-url $xhtml --stringparam include-header no $here/rdfa-stats.xslt $xhtml
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment