Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Converting an NVARCHAR string to a UTF-8 VARBINARY data block in pure Transact-SQL
create function [dbo].[ToUTF8](@s nvarchar(max))
returns varbinary(max)
as
begin
declare @i int = 1, @n int = len(@s), @r varbinary(max) = 0x, @c int, @d varbinary(4)
while @i <= @n
begin
set @c = unicode(substring(@s, @i, 1))
if (@c & 0xfc00) = 0xd800
begin
set @i += 1
set @c = ((@c & 0x3ff) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3ff)
end
if @c < 0x80
set @d = cast(@c as binary(1))
if @c >= 0x80 and @c < 0x800
set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xc080 as binary(2))
if @c >= 0x800 and @c < 0x10000
set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xe08080 as binary(3))
if @c >= 0x10000
set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0xFF00) | (@c & 0x3f) | 0xf0808080 as binary(4))
set @r += @d
set @i += 1
end
return @r
end
Owner

sevaa commented Nov 7, 2016

Unlike the GitHub comment system, it works correctly with characters from Unicode's astral planes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment