Skip to content

Instantly share code, notes, and snippets.

@LeKovr
Created June 2, 2019 07:58
Show Gist options
  • Save LeKovr/e7b365d2dca58e4bc8c8f4695e0ca435 to your computer and use it in GitHub Desktop.
Save LeKovr/e7b365d2dca58e4bc8c8f4695e0ca435 to your computer and use it in GitHub Desktop.
Unescape and pretty print xml in postgresql
create or replace function xml_pretty(xml)
returns xml as $$
-- requires xml2 pg extension
-- https://postgres.cz/wiki/PostgreSQL_SQL_Tricks#Pretty_xml_formating
select xslt_process($1::text,'
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="*" />
<xsl:output method="xml" indent="yes" />
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>')::xml
$$ language sql immutable strict;
create or replace function to_dec(text)
returns integer as $$
-- https://postgres.cz/wiki/PostgreSQL_SQL_Tricks_II#Conversion_between_hex_and_dec_numbers
declare r int;
begin
execute E'select x\''||$1|| E'\'::integer' into r;
return r;
end
$$ language plpgsql immutable strict;
create or replace function xml_unescape(xml)
returns text as $$
-- convert escaped sybols like '&#x43E;' to unicode
-- sample: select xml_unescape('&#x43E;&#x43F;&#x43B;&#x44F;&#x44F;'::xml) = 'опляя';
declare
s text;
rv text := $1;
begin
for s in select distinct unnest(regexp_matches($1::text,'&#x(\w+);','g')) loop
rv := replace(rv,'&#x'||s||';',chr(to_dec(s)));
end loop;
return rv;
end
$$ language plpgsql immutable strict;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment