Skip to content

Instantly share code, notes, and snippets.

@Aminadav
Created May 12, 2016 20:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Aminadav/33d90cb99c26298c48f670b8ffac39c3 to your computer and use it in GitHub Desktop.
Save Aminadav/33d90cb99c26298c48f670b8ffac39c3 to your computer and use it in GitHub Desktop.
-- test by do
-- select extract_json_value('{"a":["a","2"]}','(/a)')
-- common schema
delimiter //
drop function if exists json_to_xml//
create function json_to_xml(
json_text text charset utf8
) returns text charset utf8
comment 'Transforms JSON to XML'
language SQL
deterministic
modifies sql data
sql security invoker
begin
declare v_from, v_old_from int unsigned;
declare v_token text;
declare v_level int;
declare v_state, expect_state varchar(255);
declare _json_tokens_id int unsigned default 0;
declare is_lvalue, is_rvalue tinyint unsigned;
declare scope_stack text charset ascii;
declare xml text charset utf8;
declare xml_nodes, xml_node text charset utf8;
set json_text := trim_wspace(json_text);
set expect_state := 'object_begin';
set is_lvalue := true;
set is_rvalue := false;
set scope_stack := '';
set xml_nodes := '';
set xml_node := '';
set xml := '';
get_token_loop: repeat
set v_old_from = v_from;
call _get_json_token(json_text, v_from, v_level, v_token, 1, v_state);
set _json_tokens_id := _json_tokens_id + 1;
if v_state = 'whitespace' then
iterate get_token_loop;
end if;
if v_level < 0 then
return null;
-- call throw('Negative nesting level found in _get_json_tokens');
end if;
if v_state = 'start' and scope_stack = '' then
leave get_token_loop;
end if;
if FIND_IN_SET(v_state, expect_state) = 0 then
return null;
-- call throw(CONCAT('Expected ', expect_state, '. Got ', v_state));
end if;
if v_state = 'array_end' and left(scope_stack, 1) = 'o' then
return null;
-- call throw(CONCAT('Missing "}". Found ', v_state));
end if;
if v_state = 'object_end' and left(scope_stack, 1) = 'a' then
return null;
-- call throw(CONCAT('Missing "]". Found ', v_state));
end if;
if v_state = 'alpha' and lower(v_token) not in ('true', 'false', 'null') then
return null;
-- call throw(CONCAT('Unsupported literal: ', v_token));
end if;
set is_rvalue := false;
case
when v_state = 'object_begin' then set expect_state := 'string', scope_stack := concat('o', scope_stack), is_lvalue := true;
when v_state = 'array_begin' then set expect_state := 'string,object_begin', scope_stack := concat('a', scope_stack), is_lvalue := false;
when v_state = 'string' and is_lvalue then set expect_state := 'colon', xml_node := v_token;
when v_state = 'colon' then set expect_state := 'string,number,alpha,object_begin,array_begin', is_lvalue := false;
when FIND_IN_SET(v_state, 'string,number,alpha') and not is_lvalue then set expect_state := 'comma,object_end,array_end', is_rvalue := true;
when v_state = 'object_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
when v_state = 'array_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2);
when v_state = 'comma' and left(scope_stack, 1) = 'o' then set expect_state := 'string', is_lvalue := true;
when v_state = 'comma' and left(scope_stack, 1) = 'a' then set expect_state := 'string,object_begin', is_lvalue := false;
end case;
set xml_node := unquote(xml_node);
if v_state = 'object_begin' then
if substring_index(xml_nodes, ',', 1) != '' then
set xml := concat(xml, '<', substring_index(xml_nodes, ',', 1), '>');
end if;
set xml_nodes := concat(',', xml_nodes);
end if;
if v_state = 'string' and is_lvalue then
if left(xml_nodes, 1) = ',' then
set xml_nodes := concat(xml_node, xml_nodes);
else
set xml_nodes := concat(xml_node, substring(xml_nodes, locate(',', xml_nodes)));
end if;
end if;
if is_rvalue then
set xml := concat(xml, '<', xml_node, '>', encode_xml(unquote(v_token)), '</', xml_node, '>');
end if;
if v_state = 'object_end' then
set xml_nodes := substring(xml_nodes, locate(',', xml_nodes) + 1);
if substring_index(xml_nodes, ',', 1) != '' then
set xml := concat(xml, '</', substring_index(xml_nodes, ',', 1), '>');
end if;
end if;
until
v_old_from = v_from
end repeat;
return xml;
end;
//
delimiter ;
--
--
--
delimiter //
drop function if exists extract_json_value//
create function extract_json_value(
json_text text charset utf8,
xpath text charset utf8
) returns text charset utf8
comment 'Extracts JSON value via XPath'
language SQL
deterministic
modifies sql data
sql security invoker
begin
return ExtractValue(json_to_xml(json_text), xpath);
end;
//
delimiter ;
--
-- Trim white space characters on both sides of text.
-- As opposed to the standard TRIM() function, which only trims
-- strict space characters (' '), trim_wspace() also trims new line,
-- tab and backspace characters
--
-- example:
--
-- SELECT trim_wspace('\n a b c \n ')
-- Returns: 'a b c'
--
DELIMITER $$
DROP FUNCTION IF EXISTS trim_wspace $$
CREATE FUNCTION trim_wspace(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Trim whitespace characters on both sides'
begin
declare len INT UNSIGNED DEFAULT 0;
declare done TINYINT UNSIGNED DEFAULT 0;
if txt IS NULL then
return txt;
end if;
while not done do
set len := CHAR_LENGTH(txt);
set txt = trim(' ' FROM txt);
set txt = trim('\r' FROM txt);
set txt = trim('\n' FROM txt);
set txt = trim('\t' FROM txt);
set txt = trim('\b' FROM txt);
if CHAR_LENGTH(txt) = len then
set done := 1;
end if;
end while;
return txt;
end $$
DELIMITER ;
delimiter //
set names utf8
//
drop procedure if exists _get_json_token;
//
create procedure _get_json_token(
in p_text text charset utf8
, inout p_from int unsigned
, inout p_level int
, out p_token text charset utf8
, in allow_script_tokens int
, inout p_state enum(
'alpha'
, 'alphanum'
, 'colon'
, 'comma'
, 'decimal'
, 'error'
, 'integer'
, 'number'
, 'minus'
, 'object_begin'
, 'object_end'
, 'array_begin'
, 'array_end'
, 'start'
, 'string'
, 'whitespace'
)
)
comment 'Reads a token according to lexical rules for JSON'
language SQL
deterministic
no sql
sql security invoker
begin
declare v_length int unsigned default character_length(p_text);
declare v_char, v_lookahead, v_quote_char varchar(1) charset utf8;
declare v_from int unsigned;
declare negative_number bool default false;
if p_from is null then
set p_from = 1;
end if;
if p_level is null then
set p_level = 0;
end if;
if p_state = 'object_end' then
set p_level = p_level - 1;
end if;
if p_state = 'array_end' and allow_script_tokens then
set p_level = p_level - 1;
end if;
set v_from = p_from;
set p_token = ''
, p_state = 'start';
my_loop: while v_from <= v_length do
set v_char = substr(p_text, v_from, 1)
, v_lookahead = substr(p_text, v_from+1, 1)
;
if v_char = '-' then
set negative_number := true, v_from = v_from + 1;
iterate my_loop;
end if;
state_case: begin case p_state
when 'error' then
set p_from = v_length;
leave state_case;
when 'start' then
case
when v_char = '-' then
set p_state = 'minus', v_from = v_from + 1;
when v_char between '0' and '9' then
set p_state = 'integer';
when v_char between 'A' and 'Z'
or v_char between 'a' and 'z'
or v_char = '_' then
set p_state = 'alpha';
when v_char = ' ' then
set p_state = 'whitespace'
, v_from = v_length - character_length(ltrim(substring(p_text, v_from)))
;
leave state_case;
when v_char in ('\t', '\n', '\r') then
set p_state = 'whitespace';
when v_char = '"' then
set p_state = 'string', v_quote_char = v_char;
when v_char = '.' then
if substr(p_text, v_from + 1, 1) between '0' and '9' then
set p_state = 'decimal', v_from = v_from + 1;
else
set p_state = 'error';
leave my_loop;
end if;
when v_char = ',' then
set p_state = 'comma', v_from = v_from + 1;
leave my_loop;
when v_char = ':' then
set p_state = 'colon', v_from = v_from + 1;
leave my_loop;
when v_char = '{' then
set p_state = 'object_begin', v_from = v_from + 1, p_level = p_level + 1;
leave my_loop;
when v_char = '}' then
set p_state = 'object_end', v_from = v_from + 1;
leave my_loop;
when v_char = '[' then
set p_state = 'array_begin', v_from = v_from + 1, p_level = p_level + 1;
leave my_loop;
when v_char = ']' then
set p_state = 'array_end', v_from = v_from + 1;
leave my_loop;
else
set p_state = 'error';
end case;
when 'alpha' then
case
when v_char between 'A' and 'Z'
or v_char between 'a' and 'z'
or v_char = '_' then
leave state_case;
when v_char between '0' and '9' then
set p_state = 'alphanum';
else
leave my_loop;
end case;
when 'alphanum' then
case
when v_char between 'A' and 'Z'
or v_char between 'a' and 'z'
or v_char = '_'
or v_char between '0' and '9' then
leave state_case;
else
leave my_loop;
end case;
when 'integer' then
case
when v_char between '0' and '9' then
leave state_case;
when v_char = '.' then
set p_state = 'decimal';
else
leave my_loop;
end case;
when 'decimal' then
case
when v_char between '0' and '9' then
leave state_case;
else
leave my_loop;
end case;
when 'whitespace' then
if v_char not in ('\t', '\n', '\r') then
leave my_loop;
end if;
when 'string' then
set v_from = locate(v_quote_char, p_text, v_from);
if v_from then
if substr(p_text, v_from + 1, 1) = v_quote_char then
set v_from = v_from + 1;
elseif substr(p_text, v_from - 1, 1) != '\\' then
set v_from = v_from + 1;
leave my_loop;
end if;
else
set p_state = 'error';
leave my_loop;
end if;
else
leave my_loop;
end case; end state_case;
set v_from = v_from + 1;
end while my_loop;
set p_token = substr(p_text, p_from, v_from - p_from) collate utf8_general_ci;
set p_from = v_from;
if p_state in ('decimal', 'integer') then
set p_state := 'number';
end if;
if p_state = 'alphanum' then
set p_state := 'alpha';
end if;
if negative_number and (p_state != 'number') then
set p_token := NULL;
end if;
end;
//
delimiter ;
--
-- Unquotes a given text.
-- Removes leading and trailing quoting characters (one of: "'/)
-- Unquoting works only if both leading and trailing character are identical.
-- There is no nesting or sub-unquoting.
--
-- example:
--
-- SELECT unquote('\"saying\"')
-- Returns: 'saying'
--
DELIMITER $$
DROP FUNCTION IF EXISTS unquote $$
CREATE FUNCTION unquote(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Unquotes a given text'
begin
declare quoting_char VARCHAR(1) CHARSET utf8;
declare terminating_quote_escape_char VARCHAR(1) CHARSET utf8;
declare current_pos INT UNSIGNED;
declare end_quote_pos INT UNSIGNED;
if CHAR_LENGTH(txt) < 2 then
return txt;
end if;
set quoting_char := LEFT(txt, 1);
if not quoting_char in ('''', '"', '`', '/') then
return txt;
end if;
if txt in ('''''', '""', '``', '//') then
return '';
end if;
set current_pos := 1;
terminating_quote_loop: while current_pos > 0 do
set current_pos := LOCATE(quoting_char, txt, current_pos + 1);
if current_pos = 0 then
-- No terminating quote
return txt;
end if;
if SUBSTRING(txt, current_pos, 2) = REPEAT(quoting_char, 2) then
set current_pos := current_pos + 1;
iterate terminating_quote_loop;
end if;
set terminating_quote_escape_char := SUBSTRING(txt, current_pos - 1, 1);
if (terminating_quote_escape_char = quoting_char) or (terminating_quote_escape_char = '\\') then
-- This isn't really a quote end: the quote is escaped.
-- We do nothing; just a trivial assignment.
iterate terminating_quote_loop;
end if;
-- Found terminating quote.
leave terminating_quote_loop;
end while;
if current_pos = CHAR_LENGTH(txt) then
return SUBSTRING(txt, 2, CHAR_LENGTH(txt) - 2);
end if;
return txt;
end $$
DELIMITER ;
--
-- Encode a given text for XML.
--
DELIMITER $$
DROP FUNCTION IF EXISTS encode_xml $$
CREATE FUNCTION encode_xml(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
COMMENT 'Encode (escape) given text for XML'
begin
set txt := REPLACE(txt, '&', '&amp;');
set txt := REPLACE(txt, '<', '&lt;');
set txt := REPLACE(txt, '>', '&gt;');
set txt := REPLACE(txt, '"', '&quot;');
set txt := REPLACE(txt, '''', '&apos;');
return txt;
end $$
DELIMITER ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment