Skip to content

Instantly share code, notes, and snippets.

@mgieseki
Last active April 6, 2016 14:29
Show Gist options
  • Save mgieseki/b53131f026a9c1f92abc8c977f5d69e2 to your computer and use it in GitHub Desktop.
Save mgieseki/b53131f026a9c1f92abc8c977f5d69e2 to your computer and use it in GitHub Desktop.
This XSLT script takes an SVG file created by `dvisvgm --no-merge --no-styles ...` and puts all adjacent characters with the same vertical position in a single tspan element.
<?xml version="1.0"?>
<!-- License: GPL v3 -->
<!-- (C) 2016 Martin Gieseking <martin.gieseking@uos.de> -->
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xlink='http://www.w3.org/1999/xlink'
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:svg='http://www.w3.org/2000/svg'
xmlns='http://www.w3.org/2000/svg'
xmlns:my="my-namespace"
exclude-result-prefixes="#all">
<xsl:output method="xml" indent="yes"/>
<xsl:key name="text-by-location" match="svg:text" use="my:text-location-id(.)"/>
<xsl:strip-space elements="*"/>
<!-- insert a space between to characters if their distance is >= this threshold (in pt units)-->
<xsl:variable name="word-threshold" select="3"/>
<xsl:variable name="font-ids">
<xsl:for-each-group select="//svg:text" group-by="concat(@font-family,@font-size)">
<xsl:sort select="current-grouping-key()"/>
<id key="{current-grouping-key()}" pos="{position()}" family="{@font-family}" size="{@font-size}"/>
</xsl:for-each-group>
</xsl:variable>
<xsl:template match="svg:g[starts-with(@id, 'page')]">
<g id="{@id}">
<!-- create CSS style definitions for the fonts -->
<style type="text/css">
<xsl:text>&#10;</xsl:text>
<xsl:for-each select="$font-ids/*">
<xsl:value-of select="concat('.f', @pos, '{font-family:', @family, ';font-size:', @size, 'px}&#10;')"/>
</xsl:for-each>
</style>
<xsl:apply-templates/>
</g>
</xsl:template>
<xsl:template match="svg:text">
<xsl:variable name="node-id" select="my:text-location-id(.)"/>
<xsl:if test="generate-id()=generate-id(key('text-by-location', $node-id)[1])">
<xsl:variable name="group" select="key('text-by-location', $node-id)"/>
<text class="{my:font-id(.)}">
<xsl:copy-of select="@*[not(starts-with(name(), 'font-'))]"/>
<xsl:copy-of select="my:tspan($group)"/>
</text>
</xsl:if>
</xsl:template>
<xsl:template match="svg:font">
<xsl:copy>
<xsl:apply-templates select="@*|*"/>
<glyph d="" unicode=" " hori-adv-x="0" vert-adv-y="0"/>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:function name="my:tspan" as="element(svg:tspan)">
<xsl:param name="text" as="element(svg:text)+"/>
<xsl:variable name="chars">
<xsl:for-each select="$text">
<xsl:variable name="prevpos" select="position()-1"/>
<xsl:variable name="dx" select="if ($prevpos=0) then 0 else @x -$text[$prevpos]/@x - my:xadvance($text[$prevpos])"/>
<xsl:if test="$dx >= $word-threshold">
<char dx="0" font-id="{my:font-id($text[$prevpos])}" xml:space="preserve"> </char>
</xsl:if>
<char dx="{format-number(if (abs($dx) > 0.1) then $dx else 0, '#.###')}" font-id="{my:font-id(.)}">
<xsl:value-of select="."/>
</char>
</xsl:for-each>
</xsl:variable>
<tspan dx="{$chars/*/@dx}">
<xsl:for-each-group select="$chars/*" group-adjacent="@font-id">
<xsl:choose>
<xsl:when test="@font-id=$chars/*[1]/@font-id">
<xsl:for-each select="current-group()">
<xsl:value-of select="."/>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<tspan class="{@font-id}">
<xsl:for-each select="current-group()">
<xsl:value-of select="."/>
</xsl:for-each>
</tspan>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</tspan>
</xsl:function>
<!-- Returns the horizonal advance value for the character present in the given text element. -->
<xsl:function name="my:xadvance" as="xs:double">
<xsl:param name="text" as="element(svg:text)"/>
<xsl:variable name="font" select="$text/ancestor::svg:svg/svg:defs/svg:font[@id=$text/@font-family]"/>
<xsl:variable name="upem" select="$font/svg:font-face/@units-per-em"/>
<xsl:variable name="glyph" select="$font/svg:glyph[@unicode=$text]"/>
<xsl:value-of select="if ($glyph) then $glyph/@horiz-adv-x div $upem * $text/@font-size else 0"/>
</xsl:function>
<!-- Returns an ID for a font family/size pair -->
<xsl:function name="my:font-id" as="xs:string">
<xsl:param name="text" as="element(svg:text)"/>
<xsl:variable name="key" select="concat($text/@font-family, $text/@font-size)"/>
<xsl:value-of select="concat('f', $font-ids/*[@key=$key]/@pos)"/>
</xsl:function>
<!-- Returns a string identifying the relative location of a text element -->
<xsl:function name="my:text-location-id" as="xs:string">
<xsl:param name="text" as="element(svg:text)"/>
<xsl:value-of select="concat($text/@y,'-', count($text/preceding-sibling::*[local-name()!='text' or @y!=$text/@y]))"/>
</xsl:function>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment