Last active
September 23, 2020 02:14
-
-
Save stIncMale/1db6608396edb35a1cc19894f430b5a2 to your computer and use it in GitHub Desktop.
The largest SQL I have written so far (PostgreSQL syntax)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with | |
new_event_tree_model_lob as (--the format of event_tree.model_lob is specified below | |
select | |
event_tree_id, | |
string_agg( | |
array_to_string( | |
model_attr_array[:1] ||--a slice of the array with indices <=1, we need an array type here instead of an element of the array | |
check_not_null(--if there is no matching file then model_lob refers to a file that does not exist, which means the data is corrupted | |
files.new_id, | |
array['files for event_tree', event_tree_id::text, 'model_idx', model_idx::text] || 'model_attr_array'::text || model_attr_array)::text || | |
model_attr_array[3] || | |
check_not_null( | |
stream_function.new_id, | |
array['stream_function for event_tree', event_tree_id::text, 'model_idx', model_idx::text] || 'model_attr_array'::text || model_attr_array)::text || | |
check_not_null( | |
function_metrics.new_id, | |
array['function_metrics for event_tree', event_tree_id::text, 'model_idx', model_idx::text] || 'model_attr_array'::text || model_attr_array)::text, | |
',' | |
), | |
';' order by model_idx | |
) as new_model_lob | |
from | |
(select id as event_tree_id, model_idx, string_to_array(model, ',') as model_attr_array | |
from | |
(values (1001, '0,111,1,113,114;0,121,1,123,124'), (1002, '0,211,1,213,214')) as event_tree (id, model_lob) | |
cross join unnest(string_to_array(model_lob, ';')) with ordinality as model (model, model_idx)--a table containing each model from model_lob in its own row | |
) as parsed_model_lob--a table containing array of attributes representing each model (an array of attributes) from model_lob in its own row | |
--we use left join despite we must always find a corresponding row in the right-side table in order to be able to check this requirement by using check_not_null | |
left join (values (111, -111), (121, -121), (211, -211)) as files (id, new_id) on model_attr_array[2]::bigint = files.id | |
left join (values (113, -113), (123, -123), (213, -213)) as stream_function (id, new_id) on model_attr_array[4]::bigint = stream_function.id | |
left join (values (114, -114), (124, -124), (214, -214)) as function_metrics (id, new_id) on model_attr_array[5]::bigint = function_metrics.id | |
group by event_tree_id | |
), | |
new_event_tree_event_lob as (--the format of event_tree.event_lob is specified below | |
with | |
parsed_event_lob_parsed_new_attr11 as (--same as parsed_event_lob_parsed_attr11 but with updated value of 11th attr | |
select | |
event_tree_id, | |
event_idx, | |
/* There is only a single row for each event_tree_id and event_idx | |
* that has non-null event_attr_array. We are using max aggregate function to extract that non-null value instead of | |
* having the value in all rows and group by not only event_tree_id and event_idx | |
* but also event_attr_array which would have been more resource-consuming. | |
* | |
* max is an immutable function, so PostgreSQL can (and most likely does) calculate the result only once per selected row, | |
* thus we should not worry about "calling" the function multiple times per row with the same arguments.*/ | |
case when (max(event_attr_array)::text[])[1] = 'S' | |
then (max(event_attr_array)::text[])[:10] || string_agg(new_element, '&' order by elmnt_idx) || (max(event_attr_array)::text[])[12:] | |
else max(event_attr_array) | |
end event_attr_array_new_attr11 | |
from | |
(with | |
parsed_event_lob_parsed_attr11 as ( | |
select | |
event_tree_id, event_idx, | |
/* Select array only for the first element. | |
* This allows us to group by event_idx instead of grouping by both event_idx and event_attr_array_no_attr11 thus reduces costs | |
* and also potentially reduces memory / temporary storage consumption.*/ | |
case | |
when elmnt_idx = 1 then event_attr_array | |
else null | |
end as event_attr_array, | |
elmnt_idx, | |
string_to_array(elmnt, ':') as element_value_array | |
from | |
(select | |
event_tree_id, | |
event_idx, | |
event_attr_array, | |
string_to_array( | |
case | |
when event_attr_array[1] = 'S' then | |
case | |
when event_attr_array[11] = '' then 'null_attr11'--'null_attr11' is a special dummy value for empty 11th attr to make sure we are not loosing it | |
else event_attr_array[11] | |
end | |
/* Have to use a string and later specify it as a null-string in string_to_array | |
* because otherwise string_to_array produces an array with no elements which when unnested results in a table with no rows, | |
* and cross join (which is also used when no join type is specified) with such a table also produces no rows, which leads to us losing data.*/ | |
else 'null' | |
end, | |
'&', 'null'--specify null-string as mentioned above | |
) as event_attr11_element_array | |
from | |
(select id as event_tree_id, event_idx, string_to_array(event, ',') as event_attr_array | |
from | |
(values (1001, 'C,111,112,0~0,0,-1,0,113,-1,0;P,111,112,0~0,0,-1,114,0,0,0,0'), (1002, 'G,211,212,0~0,0,-1;S,221,222,0~0,0,-1,0,-1,-1,0,,-;S,221,222,0~0,0,-1,0,-1,-1,0,com/File1.java:123:com.Cls.function1:223&com/File2.java:231:com.Cls.function2:224&com/File3.java:312:com.Cls.function3:225&/java/lang/Thread.java:-1:java.lang.Thread.run:-1,-'), (1003, 'R,211,212,0~0,0,-1,-')) as event_tree (id, event_lob) | |
cross join unnest(string_to_array(event_lob, ';')) with ordinality as event (event, event_idx)--a table containing each event from event_lob in its own row | |
where event_tree.id != 1--event tree 1 is an invalid dummy entry | |
) as parsed_event_lob--a table containing array of attributes representing each event from event_lob in its own row | |
) as parsed_event_lob_preparsed_attr11--a table containing array of attributes representing each event from event_lob in its own row with 11th attr extracted in a separate column and parsed into an array of elements | |
cross join unnest(event_attr11_element_array) with ordinality as elmnt (elmnt, elmnt_idx) | |
) | |
select--a table with updated element_value_array concatenated into string | |
event_tree_id, | |
event_idx, | |
event_attr_array, | |
elmnt_idx, | |
case | |
when element_value_array[1] = 'null_attr11' then '' | |
else array_to_string( | |
element_value_array[:3] || | |
case | |
when element_value_array[4] != '-1'--file instance is supposed to be in DB | |
then default_if_null(--if there is no matching file instance then event_lob refers to a file instance that does not exist, which means the data may be corrupted; turned out sometimes the data we process is corrupted | |
file_instance.new_id, -1, | |
array['file_instance for event_tree ', event_tree_id::text, 'event_idx', event_idx::text, 'elmnt_idx=', elmnt_idx::text] || 'element_value_array'::text || element_value_array)::text | |
else '-1' | |
end, | |
':') | |
end as new_element | |
from | |
parsed_event_lob_parsed_attr11 | |
--we use left join despite we must always find a corresponding row in the right-side table in order to be able to check this requirement by using check_not_null | |
left join (values (223, -223), (224, -224), (225, -225)) as file_instance (id, new_id) on element_value_array[4]::bigint = file_instance.id | |
) parsed_event_lob_parsed_new_attr11--same as parsed_event_lob_parsed_attr11 but with updated value of 11th attr | |
group by event_tree_id, event_idx | |
) | |
select | |
event_tree_id, | |
string_agg( | |
array_to_string( | |
event_attr_array_new_attr11[:1] ||--a slice of the array with indices <=1, we need an array type here instead of an element of the array | |
check_not_null(--if there is no matching event tag then event_lob refers to an event tag that does not exist, which means the data is corrupted | |
event_tag.new_id, | |
array['event_tag for event_tree', event_tree_id::text, 'event_idx', event_idx::text] || 'event_attr_array_new_attr11'::text || event_attr_array_new_attr11)::text || | |
check_not_null( | |
event_description.new_id, | |
array['event_description for event_tree', event_tree_id::text, 'event_idx', event_idx::text] || 'event_attr_array_new_attr11'::text || event_attr_array_new_attr11)::text || | |
case | |
when event_attr_array_new_attr11[1] = 'C' or event_attr_array_new_attr11[1] = 'S' then | |
event_attr_array_new_attr11[4:7] || | |
case | |
when event_attr_array_new_attr11[8] != '-1'--file is supposed to be in DB | |
then check_not_null( | |
files.new_id, | |
array['files for event_tree', event_tree_id::text, 'event_idx', event_idx::text] || 'event_attr_array_new_attr11'::text || event_attr_array_new_attr11)::text | |
else '-1' | |
end || | |
event_attr_array_new_attr11[9:12] | |
when event_attr_array_new_attr11[1] = 'P' then | |
event_attr_array_new_attr11[4:6] || | |
case | |
when event_attr_array_new_attr11[7] != '-1'--path_condition is supposed to be in DB | |
then check_not_null( | |
path_condition.new_id, | |
array['path_condition for event_tree', event_tree_id::text, 'event_idx', event_idx::text] || 'event_attr_array_new_attr11'::text || event_attr_array_new_attr11)::text | |
else '-1' | |
end || | |
event_attr_array_new_attr11[8:11] | |
else | |
event_attr_array_new_attr11[4:7]--it is [4:7] for 'R' and [4:6] for 'G', but array_to_string skips null (and therefore non-existing) array elements | |
end, | |
',' | |
), | |
';' order by event_idx | |
) as new_event_lob | |
from | |
parsed_event_lob_parsed_new_attr11 | |
--we use left join despite we must always find a corresponding row in the right-side table in order to be able to check this requirement by using check_not_null | |
left join (values (111, -111), (211, -211), (221, -221)) as event_tag (id, new_id) on event_attr_array_new_attr11[2]::bigint = event_tag.id | |
left join (values (112, -112), (212, -212), (222, -222)) as event_description (id, new_id) on event_attr_array_new_attr11[3]::bigint = event_description.id | |
left join (values (113, -113)) as files (id, new_id) on | |
/* PostgreSQL does not have left-to-right short-circuiting of boolean operators (see https://www.postgresql.org/docs/10/sql-expressions.html), | |
* so we have to check if the string represents an integer before casting it.*/ | |
(event_attr_array_new_attr11[1] = 'C' or event_attr_array_new_attr11[1] = 'S') | |
and (case when event_attr_array_new_attr11[8] ~ '^-?\d+$' then event_attr_array_new_attr11[8]::bigint else null end) = files.id | |
left join (values (114, -114)) as path_condition (id, new_id) on | |
/* This is equivalent to | |
* case when event_attr_array_new_attr11[1] = 'P' | |
* then event_attr_array_new_attr11[7]::bigint = path_condition.id | |
* else false | |
* end | |
* but the execution plan for the CASE variant is much worse. See the execution plans comparison below[1].*/ | |
event_attr_array_new_attr11[1] = 'P' | |
and (case when event_attr_array_new_attr11[7] ~ '^-?\d+$' then event_attr_array_new_attr11[7]::bigint else null end) = path_condition.id | |
group by event_tree_id | |
) | |
select | |
event_tree_id, | |
new_model_lob, | |
new_event_lob, | |
/* PostgreSQL md5 function accepts a string instead of bytes, which means its behaviour depends on server_encoding | |
* (use "show server_encoding;" to query it). | |
* Since the application uses UTF-8 charset for converting a string into bytes, | |
* we can use PostgreSQL md5 function only if server_encoding is UTF-8. | |
* Fortunately, we require using UTF-8, so we are all good.*/ | |
md5(coalesce(new_model_lob, '') || '|' || coalesce(new_event_lob, '')) as events_md5 | |
from | |
new_event_tree_model_lob | |
full join new_event_tree_event_lob using (event_tree_id); | |
------------------------------------------------------------------------------------------------------------------------- | |
--these functions are used by the query above | |
create or replace function /*pg_temp.*/check_not_null( | |
v bigint, | |
debug_info text[]) | |
returns bigint | |
language plpgsql immutable parallel safe | |
as $$ | |
begin | |
if v is null then | |
raise exception 'The argument must not be null. Debug info: %', debug_info; | |
end if; | |
return v; | |
end; | |
$$; | |
create or replace function /*pg_temp.*/default_if_null( | |
v bigint, | |
dflt bigint, | |
debug_info text[]) | |
returns bigint | |
language plpgsql immutable parallel safe | |
as $$ | |
begin | |
if v is null then | |
raise warning 'The argument was null and %s was used instead. Debug info: %', dflt, debug_info; | |
return dflt; | |
end if; | |
return v; | |
end; | |
$$; | |
/* | |
* event_tree.mode_lob and event_tree.event_lob contain contain textual data which refers to rows in other tables. | |
* It is similar to having FK constraints, but there is no DBMS support. The task is to update parts | |
* that refer to rows in other tables with the new identifiers for those rows (the IDs are changed when data is imported). | |
* Given that the structure of event_tree.event_lob is far from trivial, doing this in SQL is ugly and not easy, | |
* but the alternative (download all the data piece by piece and process it in Java by making more requests to DBMS) | |
* is likely not a viable solution performance-wise. | |
* | |
* The strings are parsed by | |
* - splitting them into arrays; | |
* - unnesting arrays into tables; | |
* - updating elements of arrays (which are arrays on their own) that require updates; | |
* - aggregating unnested array elements back into arrays; | |
* - joining arrays back into strings. | |
* Note that event_tree.mode_lob effectively contains an array of arrays, | |
* while event_tree.event_lob may contain array of arrays of arrays (i.e. double nesting) | |
* which makes the job of updating event_tree.event_lob much more difficult. | |
* | |
* Note that indices of PostgreSQL arrays are 1-based | |
* and DBMS allows them to go out of bounds of an array in which case the value is considered null. | |
* | |
* Originally this was an update command, but it was changed to be a select and | |
* real tables were replaced with fake ones produced by using values lists, so the query could be run immediately anywhere. | |
* Despite being bulky, the original update command was able to process 400_000 rows under 30 seconds. | |
* | |
* | |
* The format of event_tree.mode_lob and event_tree.event_lob. | |
* Legend: | |
* [attrIdx] - the index of an attribute. | |
* FK -> ck - the attribute is conceptually a foreign key that references the candidate key ck (which in our case always happens to be a primary key). | |
* | |
* event_tree.model_lob format: | |
* models are separated by ';' | |
* pieces of model data (attributes) are separated by ',' | |
* attributes: | |
* [1] irrelevant | |
* [2] FK -> files.id | |
* [3] irrelevant | |
* [4] FK -> stream_function.id | |
* [5] FK -> function_metrics.id | |
* | |
* event_tree.event_lob format: | |
* Attributes of events depend on the event type. Event types are hierarchical, e.g., event type "S", | |
* is a subtype of "C". This means that "S" events have all the attributes of "C" events, | |
* namely 0 - 9 (of which attributes 0-5 are present on events of any type), but additionally have attributes 10 and 11. | |
* The attribute 10 is a set of stack trace elements, each of which itself have 4 attributes. | |
* events are separated by ';' | |
* pieces of event data (attributes) are separated by ',' | |
* attributes common for all event types: | |
* [0] event type (one of C, S, P, R, G), | |
* [1] FK -> event_tag.id, | |
* [2] FK -> event_description.id, | |
* [3] irrelevant | |
* [4] irrelevant | |
* [5] irrelevant | |
* "C"-specific attributes: | |
* [6] irrelevant | |
* [7] FK -> files.id//can be -1 which means "no value" | |
* [8] irrelevant | |
* [9] irrelevant | |
* "S"-specific attributes: | |
* [10] elements | |
* format: | |
* elements are separated by '&' | |
* pieces of element data (values) are separated by ':' | |
* values: | |
* [0] irrelevant | |
* [1] irrelevant | |
* [2] irrelevant | |
* [3] FK -> file_instance.id//can be -1 which means "no value" | |
* [11] irrelevant | |
* "P"-specific attributes: | |
* [6] FK -> path_condition.id//can be -1 which means "no value" | |
* [7] irrelevant | |
* [8] irrelevant | |
* [9] irrelevant | |
* [10] irrelevant | |
* "R"-specific attributes: | |
* [6] irrelevant | |
* "G"-specific attributes: | |
* none | |
*/ | |
/* | |
* [1] The execution plans comparison. | |
* | |
* -> Nested Loop Left Join (cost=4051.85..1183168.20 rows=21400000 width=80) | |
* Join Filter: CASE WHEN (parsed_event_lob_parsed_new_attr11.event_attr_array_new_attr11[1] = 'P'::text) | |
* THEN ((parsed_event_lob_parsed_new_attr11.event_attr_array_new_attr11[7])::bigint = path_condition.id) | |
* ELSE false | |
* END | |
* -> Nested Loop Left Join (cost=4051.85..220144.82 rows=40000 width=72) | |
* ...<a plan for obtaining result_of_previous_operations is omitted> | |
* -> Materialize (cost=0.00..26.05 rows=1070 width=16) | |
* -> Seq Scan on path_condition (cost=0.00..20.70 rows=1070 width=16) | |
* We can see that result_of_previous_operations is estimated to have 40000 rows and PG uses nested loop to join it with 1070 rows from path_condition. | |
* This results in about 40000 * 1070 / 2 = 21400000 comparisons (on average the inner loop will have 1070 / 2 iterations for each iteration of the outer loop). | |
* So the CASE plan has time complexity O(rows(result_of_previous_operations) * rows(path_condition)), which is quadratic. | |
* | |
* -> Hash Left Join (cost=4085.92..220298.90 rows=40000 width=80) | |
* Hash Cond: (CASE WHEN (parsed_event_lob_parsed_new_attr11.event_attr_array_new_attr11[7] ~ '^-?\d+$'::text) | |
* THEN (parsed_event_lob_parsed_new_attr11.event_attr_array_new_attr11[7])::bigint | |
* ELSE NULL::bigint | |
* END = path_condition.id) | |
* Join Filter: (parsed_event_lob_parsed_new_attr11.event_attr_array_new_attr11[1] = 'P'::text) | |
* -> Nested Loop Left Join (cost=4051.85..220144.82 rows=40000 width=72) | |
* ...<a plan for obtaining result_of_previous_operations is omitted> | |
* -> Hash (cost=20.70..20.70 rows=1070 width=16) | |
* -> Seq Scan on path_condition (cost=0.00..20.70 rows=1070 width=16) | |
* Now the plan is to build a hash table for 1070 rows in path_condition (which is of linear time complexity) and use hash join to join this table with 40000 rows estimated for result_of_previous_operations. | |
* This approach results in 40000 hash table lookups, compared to 21400000 comparisons in the original plan. | |
* The time complexity for the new join plan is O(rows(result_of_previous_operations) + rows(path_condition)), i.e. linear instead of being quadratic. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment