Aminadav/extract_json_value.sql

## extract_json_value.sql
-- test by do
-- select extract_json_value('{"a":["a","2"]}','(/a)')
-- common schema

delimiter //

drop function if exists json_to_xml//

create function json_to_xml(
    json_text text charset utf8
) returns text charset utf8
comment 'Transforms JSON to XML'
language SQL
deterministic
modifies sql data
sql security invoker
begin
    declare v_from, v_old_from int unsigned;
    declare v_token text;
    declare v_level int;
    declare v_state, expect_state varchar(255);
    declare _json_tokens_id int unsigned default 0;
    declare is_lvalue, is_rvalue tinyint unsigned;
    declare scope_stack text charset ascii;
    declare xml text charset utf8;
    declare xml_nodes, xml_node text charset utf8;

    set json_text := trim_wspace(json_text);

    set expect_state := 'object_begin';
    set is_lvalue := true;
    set is_rvalue := false;
    set scope_stack := '';
    set xml_nodes := '';
    set xml_node := '';
    set xml := '';
    get_token_loop: repeat
        set v_old_from = v_from;
        call _get_json_token(json_text, v_from, v_level, v_token, 1, v_state);
        set _json_tokens_id := _json_tokens_id + 1;
        if v_state = 'whitespace' then
          iterate get_token_loop;
        end if;
        if v_level < 0 then
          return null;
          -- call throw('Negative nesting level found in _get_json_tokens');
        end if;
        if v_state = 'start' and scope_stack = '' then
          leave get_token_loop;
        end if;
        if FIND_IN_SET(v_state, expect_state) = 0 then
          return null;
          -- call throw(CONCAT('Expected ', expect_state, '. Got ', v_state));
        end if;
        if v_state = 'array_end' and left(scope_stack, 1) = 'o' then
          return null;
          -- call throw(CONCAT('Missing "}". Found ', v_state));
        end if;
        if v_state = 'object_end' and left(scope_stack, 1) = 'a' then
          return null;
          -- call throw(CONCAT('Missing "]". Found ', v_state));
        end if;
        if v_state = 'alpha' and lower(v_token) not in ('true', 'false', 'null') then
          return null;
          -- call throw(CONCAT('Unsupported literal: ', v_token));
        end if;
        set is_rvalue := false;
        case
          when v_state = 'object_begin' then set expect_state := 'string', scope_stack := concat('o', scope_stack), is_lvalue := true;
          when v_state = 'array_begin' then set expect_state := 'string,object_begin', scope_stack := concat('a', scope_stack), is_lvalue := false;
          when v_state = 'string' and is_lvalue then set expect_state := 'colon', xml_node := v_token;
          when v_state = 'colon' then set expect_state := 'string,number,alpha,object_begin,array_begin', is_lvalue := false;
          when FIND_IN_SET(v_state, 'string,number,alpha') and not is_lvalue then set expect_state := 'comma,object_end,array_end', is_rvalue := true;
          when v_state = 'object_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
          when v_state = 'array_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
          when v_state = 'comma' and left(scope_stack, 1) = 'o' then set expect_state := 'string', is_lvalue := true;
          when v_state = 'comma' and left(scope_stack, 1) = 'a' then set expect_state := 'string,object_begin', is_lvalue := false;
        end case;
        set xml_node := unquote(xml_node);
        if v_state = 'object_begin' then
          if substring_index(xml_nodes, ',', 1) != '' then
            set xml := concat(xml, '<', substring_index(xml_nodes, ',', 1), '>');
          end if;
          set xml_nodes := concat(',', xml_nodes);
        end if;
        if v_state = 'string' and is_lvalue then
          if left(xml_nodes, 1) = ',' then
            set xml_nodes := concat(xml_node, xml_nodes);
          else
            set xml_nodes := concat(xml_node, substring(xml_nodes, locate(',', xml_nodes)));
          end if;
        end if;
        if is_rvalue then
          set xml := concat(xml, '<', xml_node, '>', encode_xml(unquote(v_token)), '</', xml_node, '>');
        end if;
        if v_state = 'object_end' then
          set xml_nodes := substring(xml_nodes, locate(',', xml_nodes) + 1);
          if substring_index(xml_nodes, ',', 1) != '' then
            set xml := concat(xml, '</', substring_index(xml_nodes, ',', 1), '>');
          end if;
        end if;
    until
        v_old_from = v_from
    end repeat;
    return xml;
end;
//

delimiter ;


--
--
--

delimiter //

drop function if exists extract_json_value//

create function extract_json_value(
    json_text text charset utf8,
    xpath text charset utf8
) returns text charset utf8
comment 'Extracts JSON value via XPath'
language SQL
deterministic
modifies sql data
sql security invoker
begin
  return ExtractValue(json_to_xml(json_text), xpath);
end;
//

delimiter ;


                   --
-- Trim white space characters on both sides of text.
-- As opposed to the standard TRIM() function, which only trims
-- strict space characters (' '), trim_wspace() also trims new line,
-- tab and backspace characters
--
-- example:
--
-- SELECT trim_wspace('\n a b c \n  ')
-- Returns: 'a b c'
--

DELIMITER $$

DROP FUNCTION IF EXISTS trim_wspace $$
CREATE FUNCTION trim_wspace(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Trim whitespace characters on both sides'

begin
  declare len INT UNSIGNED DEFAULT 0;
  declare done TINYINT UNSIGNED DEFAULT 0;
  if txt IS NULL then
    return txt;
  end if;
  while not done do
    set len := CHAR_LENGTH(txt);
    set txt = trim(' ' FROM txt);
    set txt = trim('\r' FROM txt);
    set txt = trim('\n' FROM txt);
    set txt = trim('\t' FROM txt);
    set txt = trim('\b' FROM txt);
    if CHAR_LENGTH(txt) = len then
      set done := 1;
    end if;
  end while;
  return txt;
end $$

DELIMITER ;


delimiter //

set names utf8
//

drop procedure if exists _get_json_token;
//

create procedure _get_json_token(
    in      p_text      text charset utf8
,   inout   p_from      int unsigned
,   inout   p_level     int
,   out     p_token     text charset utf8
,   in      allow_script_tokens int
,   inout   p_state     enum(
                            'alpha'
                        ,   'alphanum'
                        ,   'colon'
                        ,   'comma'
                        ,   'decimal'
                        ,   'error'
                        ,   'integer'
                        ,   'number'
                        ,   'minus'
                        ,   'object_begin'
                        ,   'object_end'
                        ,   'array_begin'
                        ,   'array_end'
                        ,   'start'
                        ,   'string'
                        ,   'whitespace'
                        )
)
comment 'Reads a token according to lexical rules for JSON'
language SQL
deterministic
no sql
sql security invoker
begin
    declare v_length int unsigned default character_length(p_text);
    declare v_char, v_lookahead, v_quote_char    varchar(1) charset utf8;
    declare v_from int unsigned;
    declare negative_number bool default false;

    if p_from is null then
        set p_from = 1;
    end if;
    if p_level is null then
        set p_level = 0;
    end if;
    if p_state = 'object_end' then
        set p_level = p_level - 1;
    end if;
    if p_state = 'array_end' and allow_script_tokens then
        set p_level = p_level - 1;
    end if;
    set v_from = p_from;

    set p_token = ''
    ,   p_state = 'start';
    my_loop: while v_from <= v_length do
        set v_char = substr(p_text, v_from, 1)
        ,   v_lookahead = substr(p_text, v_from+1, 1)
        ;
        if v_char = '-' then
            set negative_number := true, v_from = v_from + 1;
            iterate my_loop;
        end if;
        state_case: begin case p_state
            when 'error' then
                set p_from = v_length;
                leave state_case;
            when 'start' then
                case
                    when v_char = '-' then
                        set p_state = 'minus', v_from = v_from + 1;
                    when v_char between '0' and '9' then
                        set p_state = 'integer';
                    when v_char between 'A' and 'Z'
                    or   v_char between 'a' and 'z'
                    or   v_char = '_' then
                        set p_state = 'alpha';
                    when v_char = ' ' then
                        set p_state = 'whitespace'
                        ,   v_from = v_length - character_length(ltrim(substring(p_text, v_from)))
                        ;
                        leave state_case;
                    when v_char in ('\t', '\n', '\r') then
                        set p_state = 'whitespace';
                    when v_char = '"' then
                        set p_state = 'string', v_quote_char = v_char;
                    when v_char = '.' then
                        if substr(p_text, v_from + 1, 1) between '0' and '9' then
                            set p_state = 'decimal', v_from = v_from + 1;
                        else
                            set p_state = 'error';
                            leave my_loop;
                        end if;
                    when v_char = ',' then
                        set p_state = 'comma', v_from = v_from + 1;
                        leave my_loop;
                    when v_char = ':' then
                        set p_state = 'colon', v_from = v_from + 1;
                        leave my_loop;
                    when v_char = '{' then
                        set p_state = 'object_begin', v_from = v_from + 1, p_level = p_level + 1;
                        leave my_loop;
                    when v_char = '}' then
                        set p_state = 'object_end', v_from = v_from + 1;
                        leave my_loop;
                    when v_char = '[' then
                        set p_state = 'array_begin', v_from = v_from + 1, p_level = p_level + 1;
                        leave my_loop;
                    when v_char = ']' then
                        set p_state = 'array_end', v_from = v_from + 1;
                        leave my_loop;
                    else
                        set p_state = 'error';
                end case;
            when 'alpha' then
                case
                    when v_char between 'A' and 'Z'
                    or   v_char between 'a' and 'z'
                    or   v_char = '_' then
                        leave state_case;
                    when v_char between '0' and '9' then
                        set p_state = 'alphanum';
                    else
                        leave my_loop;
                end case;
            when 'alphanum' then
                case
                    when v_char between 'A' and 'Z'
                    or   v_char between 'a' and 'z'
                    or   v_char = '_'
                    or   v_char between '0' and '9' then
                        leave state_case;
                    else
                        leave my_loop;
                end case;
            when 'integer' then
                case
                    when v_char between '0' and '9' then
                        leave state_case;
                    when v_char = '.' then
                        set p_state = 'decimal';
                    else
                        leave my_loop;
                end case;
            when 'decimal' then
                case
                    when v_char between '0' and '9' then
                        leave state_case;
                    else
                        leave my_loop;
                end case;
            when 'whitespace' then
                if v_char not in ('\t', '\n', '\r') then
                    leave my_loop;
                end if;
            when 'string' then
                set v_from = locate(v_quote_char, p_text, v_from);
                if v_from then
                    if substr(p_text, v_from + 1, 1) = v_quote_char then
                        set v_from = v_from + 1;
                    elseif substr(p_text, v_from - 1, 1) != '\\' then
                        set v_from = v_from + 1;
                        leave my_loop;
                    end if;
                else
                    set p_state = 'error';
                    leave my_loop;
                end if;
            else
                leave my_loop;
        end case; end state_case;
        set v_from = v_from + 1;
    end while my_loop;
    set p_token = substr(p_text, p_from, v_from - p_from) collate utf8_general_ci;
    set p_from = v_from;
    if p_state in ('decimal', 'integer') then
      set p_state := 'number';
    end if;
    if p_state = 'alphanum' then
      set p_state := 'alpha';
    end if;
    if negative_number and (p_state != 'number') then
      set p_token := NULL;
    end if;
end;
//

delimiter ;


              --
-- Unquotes a given text.
-- Removes leading and trailing quoting characters (one of: "'/)
-- Unquoting works only if both leading and trailing character are identical.
-- There is no nesting or sub-unquoting.
--
-- example:
--
-- SELECT unquote('\"saying\"')
-- Returns: 'saying'
--

DELIMITER $$

DROP FUNCTION IF EXISTS unquote $$
CREATE FUNCTION unquote(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Unquotes a given text'

begin
  declare quoting_char VARCHAR(1) CHARSET utf8;
  declare terminating_quote_escape_char VARCHAR(1) CHARSET utf8;
  declare current_pos INT UNSIGNED;
  declare end_quote_pos INT UNSIGNED;

  if CHAR_LENGTH(txt) < 2 then
    return txt;
  end if;

  set quoting_char := LEFT(txt, 1);
  if not quoting_char in ('''', '"', '`', '/') then
    return txt;
  end if;
  if txt in ('''''', '""', '``', '//') then
    return '';
  end if;

  set current_pos := 1;
  terminating_quote_loop: while current_pos > 0 do
    set current_pos := LOCATE(quoting_char, txt, current_pos + 1);
    if current_pos = 0 then
      -- No terminating quote
      return txt;
    end if;
    if SUBSTRING(txt, current_pos, 2) = REPEAT(quoting_char, 2) then
      set current_pos := current_pos + 1;
      iterate terminating_quote_loop;
    end if;
    set terminating_quote_escape_char := SUBSTRING(txt, current_pos - 1, 1);
    if (terminating_quote_escape_char = quoting_char) or (terminating_quote_escape_char = '\\') then
      -- This isn't really a quote end: the quote is escaped.
      -- We do nothing; just a trivial assignment.
      iterate terminating_quote_loop;
    end if;
    -- Found terminating quote.
    leave terminating_quote_loop;
  end while;
  if current_pos = CHAR_LENGTH(txt) then
      return SUBSTRING(txt, 2, CHAR_LENGTH(txt) - 2);
    end if;
  return txt;
end $$

DELIMITER ;


--
-- Encode a given text for XML.
--

DELIMITER $$

DROP FUNCTION IF EXISTS encode_xml $$
CREATE FUNCTION encode_xml(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Encode (escape) given text for XML'

begin
  set txt := REPLACE(txt, '&', '&amp;');
  set txt := REPLACE(txt, '<', '&lt;');
  set txt := REPLACE(txt, '>', '&gt;');
  set txt := REPLACE(txt, '"', '&quot;');
  set txt := REPLACE(txt, '''', '&apos;');

  return txt;
end $$

DELIMITER ;
	-- test by do
	-- select extract_json_value('{"a":["a","2"]}','(/a)')
	-- common schema

	delimiter //

	drop function if exists json_to_xml//

	create function json_to_xml(
	json_text text charset utf8
	) returns text charset utf8
	comment 'Transforms JSON to XML'
	language SQL
	deterministic
	modifies sql data
	sql security invoker
	begin
	declare v_from, v_old_from int unsigned;
	declare v_token text;
	declare v_level int;
	declare v_state, expect_state varchar(255);
	declare _json_tokens_id int unsigned default 0;
	declare is_lvalue, is_rvalue tinyint unsigned;
	declare scope_stack text charset ascii;
	declare xml text charset utf8;
	declare xml_nodes, xml_node text charset utf8;

	set json_text := trim_wspace(json_text);

	set expect_state := 'object_begin';
	set is_lvalue := true;
	set is_rvalue := false;
	set scope_stack := '';
	set xml_nodes := '';
	set xml_node := '';
	set xml := '';
	get_token_loop: repeat
	set v_old_from = v_from;
	call _get_json_token(json_text, v_from, v_level, v_token, 1, v_state);
	set _json_tokens_id := _json_tokens_id + 1;
	if v_state = 'whitespace' then
	iterate get_token_loop;
	end if;
	if v_level < 0 then
	return null;
	-- call throw('Negative nesting level found in _get_json_tokens');
	end if;
	if v_state = 'start' and scope_stack = '' then
	leave get_token_loop;
	end if;
	if FIND_IN_SET(v_state, expect_state) = 0 then
	return null;
	-- call throw(CONCAT('Expected ', expect_state, '. Got ', v_state));
	end if;
	if v_state = 'array_end' and left(scope_stack, 1) = 'o' then
	return null;
	-- call throw(CONCAT('Missing "}". Found ', v_state));
	end if;
	if v_state = 'object_end' and left(scope_stack, 1) = 'a' then
	return null;
	-- call throw(CONCAT('Missing "]". Found ', v_state));
	end if;
	if v_state = 'alpha' and lower(v_token) not in ('true', 'false', 'null') then
	return null;
	-- call throw(CONCAT('Unsupported literal: ', v_token));
	end if;
	set is_rvalue := false;
	case
	when v_state = 'object_begin' then set expect_state := 'string', scope_stack := concat('o', scope_stack), is_lvalue := true;
	when v_state = 'array_begin' then set expect_state := 'string,object_begin', scope_stack := concat('a', scope_stack), is_lvalue := false;
	when v_state = 'string' and is_lvalue then set expect_state := 'colon', xml_node := v_token;
	when v_state = 'colon' then set expect_state := 'string,number,alpha,object_begin,array_begin', is_lvalue := false;
	when FIND_IN_SET(v_state, 'string,number,alpha') and not is_lvalue then set expect_state := 'comma,object_end,array_end', is_rvalue := true;
	when v_state = 'object_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
	when v_state = 'array_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
	when v_state = 'comma' and left(scope_stack, 1) = 'o' then set expect_state := 'string', is_lvalue := true;
	when v_state = 'comma' and left(scope_stack, 1) = 'a' then set expect_state := 'string,object_begin', is_lvalue := false;
	end case;
	set xml_node := unquote(xml_node);
	if v_state = 'object_begin' then
	if substring_index(xml_nodes, ',', 1) != '' then
	set xml := concat(xml, '<', substring_index(xml_nodes, ',', 1), '>');
	end if;
	set xml_nodes := concat(',', xml_nodes);
	end if;
	if v_state = 'string' and is_lvalue then
	if left(xml_nodes, 1) = ',' then
	set xml_nodes := concat(xml_node, xml_nodes);
	else
	set xml_nodes := concat(xml_node, substring(xml_nodes, locate(',', xml_nodes)));
	end if;
	end if;
	if is_rvalue then
	set xml := concat(xml, '<', xml_node, '>', encode_xml(unquote(v_token)), '</', xml_node, '>');
	end if;
	if v_state = 'object_end' then
	set xml_nodes := substring(xml_nodes, locate(',', xml_nodes) + 1);
	if substring_index(xml_nodes, ',', 1) != '' then
	set xml := concat(xml, '</', substring_index(xml_nodes, ',', 1), '>');
	end if;
	end if;
	until
	v_old_from = v_from
	end repeat;
	return xml;
	end;
	//

	delimiter ;



	--
	--
	--

	delimiter //

	drop function if exists extract_json_value//

	create function extract_json_value(
	json_text text charset utf8,
	xpath text charset utf8
	) returns text charset utf8
	comment 'Extracts JSON value via XPath'
	language SQL
	deterministic
	modifies sql data
	sql security invoker
	begin
	return ExtractValue(json_to_xml(json_text), xpath);
	end;
	//

	delimiter ;


	--
	-- Trim white space characters on both sides of text.
	-- As opposed to the standard TRIM() function, which only trims
	-- strict space characters (' '), trim_wspace() also trims new line,
	-- tab and backspace characters
	--
	-- example:
	--
	-- SELECT trim_wspace('\n a b c \n ')
	-- Returns: 'a b c'
	--

	DELIMITER $$

	DROP FUNCTION IF EXISTS trim_wspace $$
	CREATE FUNCTION trim_wspace(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
	DETERMINISTIC
	NO SQL
	SQL SECURITY INVOKER
	COMMENT 'Trim whitespace characters on both sides'

	begin
	declare len INT UNSIGNED DEFAULT 0;
	declare done TINYINT UNSIGNED DEFAULT 0;
	if txt IS NULL then
	return txt;
	end if;
	while not done do
	set len := CHAR_LENGTH(txt);
	set txt = trim(' ' FROM txt);
	set txt = trim('\r' FROM txt);
	set txt = trim('\n' FROM txt);
	set txt = trim('\t' FROM txt);
	set txt = trim('\b' FROM txt);
	if CHAR_LENGTH(txt) = len then
	set done := 1;
	end if;
	end while;
	return txt;
	end $$

	DELIMITER ;


	delimiter //

	set names utf8
	//

	drop procedure if exists _get_json_token;
	//

	create procedure _get_json_token(
	in p_text text charset utf8
	, inout p_from int unsigned
	, inout p_level int
	, out p_token text charset utf8
	, in allow_script_tokens int
	, inout p_state enum(
	'alpha'
	, 'alphanum'
	, 'colon'
	, 'comma'
	, 'decimal'
	, 'error'
	, 'integer'
	, 'number'
	, 'minus'
	, 'object_begin'
	, 'object_end'
	, 'array_begin'
	, 'array_end'
	, 'start'
	, 'string'
	, 'whitespace'
	)
	)
	comment 'Reads a token according to lexical rules for JSON'
	language SQL
	deterministic
	no sql
	sql security invoker
	begin
	declare v_length int unsigned default character_length(p_text);
	declare v_char, v_lookahead, v_quote_char varchar(1) charset utf8;
	declare v_from int unsigned;
	declare negative_number bool default false;

	if p_from is null then
	set p_from = 1;
	end if;
	if p_level is null then
	set p_level = 0;
	end if;
	if p_state = 'object_end' then
	set p_level = p_level - 1;
	end if;
	if p_state = 'array_end' and allow_script_tokens then
	set p_level = p_level - 1;
	end if;
	set v_from = p_from;

	set p_token = ''
	, p_state = 'start';
	my_loop: while v_from <= v_length do
	set v_char = substr(p_text, v_from, 1)
	, v_lookahead = substr(p_text, v_from+1, 1)
	;
	if v_char = '-' then
	set negative_number := true, v_from = v_from + 1;
	iterate my_loop;
	end if;
	state_case: begin case p_state
	when 'error' then
	set p_from = v_length;
	leave state_case;
	when 'start' then
	case
	when v_char = '-' then
	set p_state = 'minus', v_from = v_from + 1;
	when v_char between '0' and '9' then
	set p_state = 'integer';
	when v_char between 'A' and 'Z'
	or v_char between 'a' and 'z'
	or v_char = '_' then
	set p_state = 'alpha';
	when v_char = ' ' then
	set p_state = 'whitespace'
	, v_from = v_length - character_length(ltrim(substring(p_text, v_from)))
	;
	leave state_case;
	when v_char in ('\t', '\n', '\r') then
	set p_state = 'whitespace';
	when v_char = '"' then
	set p_state = 'string', v_quote_char = v_char;
	when v_char = '.' then
	if substr(p_text, v_from + 1, 1) between '0' and '9' then
	set p_state = 'decimal', v_from = v_from + 1;
	else
	set p_state = 'error';
	leave my_loop;
	end if;
	when v_char = ',' then
	set p_state = 'comma', v_from = v_from + 1;
	leave my_loop;
	when v_char = ':' then
	set p_state = 'colon', v_from = v_from + 1;
	leave my_loop;
	when v_char = '{' then
	set p_state = 'object_begin', v_from = v_from + 1, p_level = p_level + 1;
	leave my_loop;
	when v_char = '}' then
	set p_state = 'object_end', v_from = v_from + 1;
	leave my_loop;
	when v_char = '[' then
	set p_state = 'array_begin', v_from = v_from + 1, p_level = p_level + 1;
	leave my_loop;
	when v_char = ']' then
	set p_state = 'array_end', v_from = v_from + 1;
	leave my_loop;
	else
	set p_state = 'error';
	end case;
	when 'alpha' then
	case
	when v_char between 'A' and 'Z'
	or v_char between 'a' and 'z'
	or v_char = '_' then
	leave state_case;
	when v_char between '0' and '9' then
	set p_state = 'alphanum';
	else
	leave my_loop;
	end case;
	when 'alphanum' then
	case
	when v_char between 'A' and 'Z'
	or v_char between 'a' and 'z'
	or v_char = '_'
	or v_char between '0' and '9' then
	leave state_case;
	else
	leave my_loop;
	end case;
	when 'integer' then
	case
	when v_char between '0' and '9' then
	leave state_case;
	when v_char = '.' then
	set p_state = 'decimal';
	else
	leave my_loop;
	end case;
	when 'decimal' then
	case
	when v_char between '0' and '9' then
	leave state_case;
	else
	leave my_loop;
	end case;
	when 'whitespace' then
	if v_char not in ('\t', '\n', '\r') then
	leave my_loop;
	end if;
	when 'string' then
	set v_from = locate(v_quote_char, p_text, v_from);
	if v_from then
	if substr(p_text, v_from + 1, 1) = v_quote_char then
	set v_from = v_from + 1;
	elseif substr(p_text, v_from - 1, 1) != '\\' then
	set v_from = v_from + 1;
	leave my_loop;
	end if;
	else
	set p_state = 'error';
	leave my_loop;
	end if;
	else
	leave my_loop;
	end case; end state_case;
	set v_from = v_from + 1;
	end while my_loop;
	set p_token = substr(p_text, p_from, v_from - p_from) collate utf8_general_ci;
	set p_from = v_from;
	if p_state in ('decimal', 'integer') then
	set p_state := 'number';
	end if;
	if p_state = 'alphanum' then
	set p_state := 'alpha';
	end if;
	if negative_number and (p_state != 'number') then
	set p_token := NULL;
	end if;
	end;
	//

	delimiter ;


	--
	-- Unquotes a given text.
	-- Removes leading and trailing quoting characters (one of: "'/)
	-- Unquoting works only if both leading and trailing character are identical.
	-- There is no nesting or sub-unquoting.
	--
	-- example:
	--
	-- SELECT unquote('\"saying\"')
	-- Returns: 'saying'
	--

	DELIMITER $$

	DROP FUNCTION IF EXISTS unquote $$
	CREATE FUNCTION unquote(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
	DETERMINISTIC
	NO SQL
	SQL SECURITY INVOKER
	COMMENT 'Unquotes a given text'

	begin
	declare quoting_char VARCHAR(1) CHARSET utf8;
	declare terminating_quote_escape_char VARCHAR(1) CHARSET utf8;
	declare current_pos INT UNSIGNED;
	declare end_quote_pos INT UNSIGNED;

	if CHAR_LENGTH(txt) < 2 then
	return txt;
	end if;

	set quoting_char := LEFT(txt, 1);
	if not quoting_char in ('''', '"', '`', '/') then
	return txt;
	end if;
	if txt in ('''''', '""', '``', '//') then
	return '';
	end if;

	set current_pos := 1;
	terminating_quote_loop: while current_pos > 0 do
	set current_pos := LOCATE(quoting_char, txt, current_pos + 1);
	if current_pos = 0 then
	-- No terminating quote
	return txt;
	end if;
	if SUBSTRING(txt, current_pos, 2) = REPEAT(quoting_char, 2) then
	set current_pos := current_pos + 1;
	iterate terminating_quote_loop;
	end if;
	set terminating_quote_escape_char := SUBSTRING(txt, current_pos - 1, 1);
	if (terminating_quote_escape_char = quoting_char) or (terminating_quote_escape_char = '\\') then
	-- This isn't really a quote end: the quote is escaped.
	-- We do nothing; just a trivial assignment.
	iterate terminating_quote_loop;
	end if;
	-- Found terminating quote.
	leave terminating_quote_loop;
	end while;
	if current_pos = CHAR_LENGTH(txt) then
	return SUBSTRING(txt, 2, CHAR_LENGTH(txt) - 2);
	end if;
	return txt;
	end $$

	DELIMITER ;



	--
	-- Encode a given text for XML.
	--

	DELIMITER $$

	DROP FUNCTION IF EXISTS encode_xml $$
	CREATE FUNCTION encode_xml(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
	DETERMINISTIC
	NO SQL
	SQL SECURITY INVOKER
	COMMENT 'Encode (escape) given text for XML'

	begin
	set txt := REPLACE(txt, '&', '&');
	set txt := REPLACE(txt, '<', '<');
	set txt := REPLACE(txt, '>', '>');
	set txt := REPLACE(txt, '"', '"');
	set txt := REPLACE(txt, '''', ''');

	return txt;
	end $$

	DELIMITER ;