vlsi/Quiz

## init.sql
drop table z_orders;
drop table z_line_items;

create table z_orders as
  select rownum id, ora_hash(rownum, 10) order_type from xmltable('1 to 100000');

alter table z_orders add constraint id__z_orders primary key (id);

create table z_line_items as
  select ora_hash(rownum, 100000) order_id
       , case when ora_hash(rownum, 100)<10 then 42 else ora_hash(rownum, 100000) end item_id
       , ora_hash(rownum, 1000) price
    from xmltable('1 to 1000000');

create index item_id__z_line_items on z_line_items(item_id);

begin
  dbms_stats.gather_table_stats(user, 'Z_ORDERS', estimate_percent=>'100', no_invalidate=>false);
  dbms_stats.gather_table_stats(user, 'Z_LINE_ITEMS', estimate_percent=>'100', no_invalidate=>false);
end;
/

## Quiz
The goal is to make the following query perform an index range scan z_line_items on item_id column,
then broadcast the results to the slaves that would join z_orders table and check the order type.

Even though line_item scan is serial, the join of orders should go in parallel.

Oracle DB 11g is required. In Oracle 12c just adding parallel(n) is sufficient.

select sum(li.price)
  from z_line_items li
     , z_orders o
 where li.item_id = 42
   and o.id = li.order_id
   and o.order_type != 0;

Re-partitioning of orders/line_items tables is not a question. Suppose you are tuning the query
and the source tables are not partitioned.
Stick with a single SQL.
The obvoius solution of using a staging table does not qualify (even though it works).

The solution should be scalable in terms of being able to change the number of actively working
slaves by a matter of a hint change.

In other words, the plan should be like the following, but 6&7 should be performed in parallel for different line_items rows.

--------------------------------------------------------------------------------------------------------
| Id  | Operation                      | Name                  | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT               |                       |     1 |    22 |     2   (0)| 00:00:01 |
|   1 |  SORT AGGREGATE                |                       |     1 |    22 |            |          |
|   2 |   NESTED LOOPS                 |                       |       |       |            |          |
|   3 |    NESTED LOOPS                |                       |    10 |   220 |     2   (0)| 00:00:01 |
|   4 |     TABLE ACCESS BY INDEX ROWID| Z_LINE_ITEMS          |    10 |   140 |     1   (0)| 00:00:01 |
|*  5 |      INDEX RANGE SCAN          | ITEM_ID__Z_LINE_ITEMS |    10 |       |     1   (0)| 00:00:01 |
|*  6 |     INDEX UNIQUE SCAN          | ID__Z_ORDERS          |     1 |       |     1   (0)| 00:00:01 |
|*  7 |    TABLE ACCESS BY INDEX ROWID | Z_ORDERS              |     1 |     8 |     1   (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   5 - access("LI"."ITEM_ID"=42)
   6 - access("O"."ID"="LI"."ORDER_ID")
   7 - filter("O"."ORDER_TYPE"<>0)


## with_materialize.sql
Let us try "with materialize"

with v as(
   select /*+ materialize*/
       rowid rid
   from z_line_items li
   where li.item_id = 42
)
select *
  From (
select /*+ leading(v li o) parallel(v 8) */
       *
  from v
     , z_line_items li
     , z_orders o
 where li.rowid = v.rid
   and o.id = li.order_id
   and o.order_type != 0
) where rownum > 1; -- rownum > 1 is just to avoid long fetch

select * from V$PQ_TQSTAT;

DFO_NUMBER      TQ_ID SERVER_TYPE   NUM_ROWS      BYTES  OPEN_TIME AVG_LATENCY      WAITS   TIMEOUTS PROCESS   INSTANCE
---------- ---------- ----------- ---------- ---------- ---------- ----------- ---------- ---------- ------- ----------
         1          0 Producer         48143    2925030          0           0         13          0 P000             1
         1          0 Producer         42138    2560170          0           0         12          0 P001             1
         1          0 Consumer         90281    5485200          0           0        365         34 QC               1

Note how only two parallel slaves are involved, so the solution does not scale.
	drop table z_orders;
	drop table z_line_items;

	create table z_orders as
	select rownum id, ora_hash(rownum, 10) order_type from xmltable('1 to 100000');

	alter table z_orders add constraint id__z_orders primary key (id);

	create table z_line_items as
	select ora_hash(rownum, 100000) order_id
	, case when ora_hash(rownum, 100)<10 then 42 else ora_hash(rownum, 100000) end item_id
	, ora_hash(rownum, 1000) price
	from xmltable('1 to 1000000');

	create index item_id__z_line_items on z_line_items(item_id);

	begin
	dbms_stats.gather_table_stats(user, 'Z_ORDERS', estimate_percent=>'100', no_invalidate=>false);
	dbms_stats.gather_table_stats(user, 'Z_LINE_ITEMS', estimate_percent=>'100', no_invalidate=>false);
	end;
	/
	The goal is to make the following query perform an index range scan z_line_items on item_id column,
	then broadcast the results to the slaves that would join z_orders table and check the order type.

	Even though line_item scan is serial, the join of orders should go in parallel.

	Oracle DB 11g is required. In Oracle 12c just adding parallel(n) is sufficient.

	select sum(li.price)
	from z_line_items li
	, z_orders o
	where li.item_id = 42
	and o.id = li.order_id
	and o.order_type != 0;

	Re-partitioning of orders/line_items tables is not a question. Suppose you are tuning the query
	and the source tables are not partitioned.
	Stick with a single SQL.
	The obvoius solution of using a staging table does not qualify (even though it works).

	The solution should be scalable in terms of being able to change the number of actively working
	slaves by a matter of a hint change.

	In other words, the plan should be like the following, but 6&7 should be performed in parallel for different line_items rows.

	--------------------------------------------------------------------------------------------------------
	\| Id \| Operation \| Name \| Rows \| Bytes \| Cost (%CPU)\| Time \|
	--------------------------------------------------------------------------------------------------------
	\| 0 \| SELECT STATEMENT \| \| 1 \| 22 \| 2 (0)\| 00:00:01 \|
	\| 1 \| SORT AGGREGATE \| \| 1 \| 22 \| \| \|
	\| 2 \| NESTED LOOPS \| \| \| \| \| \|
	\| 3 \| NESTED LOOPS \| \| 10 \| 220 \| 2 (0)\| 00:00:01 \|
	\| 4 \| TABLE ACCESS BY INDEX ROWID\| Z_LINE_ITEMS \| 10 \| 140 \| 1 (0)\| 00:00:01 \|
	\|* 5 \| INDEX RANGE SCAN \| ITEM_ID__Z_LINE_ITEMS \| 10 \| \| 1 (0)\| 00:00:01 \|
	\|* 6 \| INDEX UNIQUE SCAN \| ID__Z_ORDERS \| 1 \| \| 1 (0)\| 00:00:01 \|
	\|* 7 \| TABLE ACCESS BY INDEX ROWID \| Z_ORDERS \| 1 \| 8 \| 1 (0)\| 00:00:01 \|
	--------------------------------------------------------------------------------------------------------

	Predicate Information (identified by operation id):
	---------------------------------------------------

	5 - access("LI"."ITEM_ID"=42)
	6 - access("O"."ID"="LI"."ORDER_ID")
	7 - filter("O"."ORDER_TYPE"<>0)
	Let us try "with materialize"

	with v as(
	select /+ materialize/
	rowid rid
	from z_line_items li
	where li.item_id = 42
	)
	select *
	From (
	select /+ leading(v li o) parallel(v 8) /
	*
	from v
	, z_line_items li
	, z_orders o
	where li.rowid = v.rid
	and o.id = li.order_id
	and o.order_type != 0
	) where rownum > 1; -- rownum > 1 is just to avoid long fetch

	select * from V$PQ_TQSTAT;

	DFO_NUMBER TQ_ID SERVER_TYPE NUM_ROWS BYTES OPEN_TIME AVG_LATENCY WAITS TIMEOUTS PROCESS INSTANCE
	---------- ---------- ----------- ---------- ---------- ---------- ----------- ---------- ---------- ------- ----------
	1 0 Producer 48143 2925030 0 0 13 0 P000 1
	1 0 Producer 42138 2560170 0 0 12 0 P001 1
	1 0 Consumer 90281 5485200 0 0 365 34 QC 1

	Note how only two parallel slaves are involved, so the solution does not scale.