Last active
April 6, 2016 14:29
-
-
Save mgieseki/b53131f026a9c1f92abc8c977f5d69e2 to your computer and use it in GitHub Desktop.
This XSLT script takes an SVG file created by `dvisvgm --no-merge --no-styles ...` and puts all adjacent characters with the same vertical position in a single tspan element.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<!-- License: GPL v3 --> | |
<!-- (C) 2016 Martin Gieseking <martin.gieseking@uos.de> --> | |
<xsl:stylesheet version="2.0" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:xlink='http://www.w3.org/1999/xlink' | |
xmlns:xs="http://www.w3.org/2001/XMLSchema" | |
xmlns:svg='http://www.w3.org/2000/svg' | |
xmlns='http://www.w3.org/2000/svg' | |
xmlns:my="my-namespace" | |
exclude-result-prefixes="#all"> | |
<xsl:output method="xml" indent="yes"/> | |
<xsl:key name="text-by-location" match="svg:text" use="my:text-location-id(.)"/> | |
<xsl:strip-space elements="*"/> | |
<!-- insert a space between to characters if their distance is >= this threshold (in pt units)--> | |
<xsl:variable name="word-threshold" select="3"/> | |
<xsl:variable name="font-ids"> | |
<xsl:for-each-group select="//svg:text" group-by="concat(@font-family,@font-size)"> | |
<xsl:sort select="current-grouping-key()"/> | |
<id key="{current-grouping-key()}" pos="{position()}" family="{@font-family}" size="{@font-size}"/> | |
</xsl:for-each-group> | |
</xsl:variable> | |
<xsl:template match="svg:g[starts-with(@id, 'page')]"> | |
<g id="{@id}"> | |
<!-- create CSS style definitions for the fonts --> | |
<style type="text/css"> | |
<xsl:text> </xsl:text> | |
<xsl:for-each select="$font-ids/*"> | |
<xsl:value-of select="concat('.f', @pos, '{font-family:', @family, ';font-size:', @size, 'px} ')"/> | |
</xsl:for-each> | |
</style> | |
<xsl:apply-templates/> | |
</g> | |
</xsl:template> | |
<xsl:template match="svg:text"> | |
<xsl:variable name="node-id" select="my:text-location-id(.)"/> | |
<xsl:if test="generate-id()=generate-id(key('text-by-location', $node-id)[1])"> | |
<xsl:variable name="group" select="key('text-by-location', $node-id)"/> | |
<text class="{my:font-id(.)}"> | |
<xsl:copy-of select="@*[not(starts-with(name(), 'font-'))]"/> | |
<xsl:copy-of select="my:tspan($group)"/> | |
</text> | |
</xsl:if> | |
</xsl:template> | |
<xsl:template match="svg:font"> | |
<xsl:copy> | |
<xsl:apply-templates select="@*|*"/> | |
<glyph d="" unicode=" " hori-adv-x="0" vert-adv-y="0"/> | |
</xsl:copy> | |
</xsl:template> | |
<xsl:template match="node()|@*"> | |
<xsl:copy> | |
<xsl:apply-templates select="@*|node()"/> | |
</xsl:copy> | |
</xsl:template> | |
<xsl:function name="my:tspan" as="element(svg:tspan)"> | |
<xsl:param name="text" as="element(svg:text)+"/> | |
<xsl:variable name="chars"> | |
<xsl:for-each select="$text"> | |
<xsl:variable name="prevpos" select="position()-1"/> | |
<xsl:variable name="dx" select="if ($prevpos=0) then 0 else @x -$text[$prevpos]/@x - my:xadvance($text[$prevpos])"/> | |
<xsl:if test="$dx >= $word-threshold"> | |
<char dx="0" font-id="{my:font-id($text[$prevpos])}" xml:space="preserve"> </char> | |
</xsl:if> | |
<char dx="{format-number(if (abs($dx) > 0.1) then $dx else 0, '#.###')}" font-id="{my:font-id(.)}"> | |
<xsl:value-of select="."/> | |
</char> | |
</xsl:for-each> | |
</xsl:variable> | |
<tspan dx="{$chars/*/@dx}"> | |
<xsl:for-each-group select="$chars/*" group-adjacent="@font-id"> | |
<xsl:choose> | |
<xsl:when test="@font-id=$chars/*[1]/@font-id"> | |
<xsl:for-each select="current-group()"> | |
<xsl:value-of select="."/> | |
</xsl:for-each> | |
</xsl:when> | |
<xsl:otherwise> | |
<tspan class="{@font-id}"> | |
<xsl:for-each select="current-group()"> | |
<xsl:value-of select="."/> | |
</xsl:for-each> | |
</tspan> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:for-each-group> | |
</tspan> | |
</xsl:function> | |
<!-- Returns the horizonal advance value for the character present in the given text element. --> | |
<xsl:function name="my:xadvance" as="xs:double"> | |
<xsl:param name="text" as="element(svg:text)"/> | |
<xsl:variable name="font" select="$text/ancestor::svg:svg/svg:defs/svg:font[@id=$text/@font-family]"/> | |
<xsl:variable name="upem" select="$font/svg:font-face/@units-per-em"/> | |
<xsl:variable name="glyph" select="$font/svg:glyph[@unicode=$text]"/> | |
<xsl:value-of select="if ($glyph) then $glyph/@horiz-adv-x div $upem * $text/@font-size else 0"/> | |
</xsl:function> | |
<!-- Returns an ID for a font family/size pair --> | |
<xsl:function name="my:font-id" as="xs:string"> | |
<xsl:param name="text" as="element(svg:text)"/> | |
<xsl:variable name="key" select="concat($text/@font-family, $text/@font-size)"/> | |
<xsl:value-of select="concat('f', $font-ids/*[@key=$key]/@pos)"/> | |
</xsl:function> | |
<!-- Returns a string identifying the relative location of a text element --> | |
<xsl:function name="my:text-location-id" as="xs:string"> | |
<xsl:param name="text" as="element(svg:text)"/> | |
<xsl:value-of select="concat($text/@y,'-', count($text/preceding-sibling::*[local-name()!='text' or @y!=$text/@y]))"/> | |
</xsl:function> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment