Skip to content

Instantly share code, notes, and snippets.

@tmuth
Created June 1, 2021 17:45
Show Gist options
  • Save tmuth/2b52152a72c6a348f9575e7fad546f9a to your computer and use it in GitHub Desktop.
Save tmuth/2b52152a72c6a348f9575e7fad546f9a to your computer and use it in GitHub Desktop.
select /*+ 2 buckets */ ora_hash(last_name,1) the_hash, last_name from customers where rownum <= 10;
THE_HASH LAST_NAME
---------- --------------------------------------------------
1 Brown
0 Riddell
0 Lawrence
1 Bartlett
1 Kenny
0 Williams
1 Jones
0 Blackwell
1 Regalado
0 Cardwell
10 rows selected
select /*+ 4 buckets */ ora_hash(last_name,3) the_hash, last_name from customers where rownum <= 10;
THE_HASH LAST_NAME
---------- --------------------------------------------------
1 Brown
0 Riddell
0 Lawrence
3 Bartlett
1 Kenny
0 Williams
1 Jones
2 Blackwell
3 Regalado
0 Cardwell
10 rows selected
-- 4 individual queries, 1 for each bucket. The idea is that today you would create 4 inputs in DBX, 1 for each query to
-- do manual parallelism
select * from customers where ora_hash(last_name,3) = 0;
select * from customers where ora_hash(last_name,3) = 1;
select * from customers where ora_hash(last_name,3) = 2;
select * from customers where ora_hash(last_name,3) = 3;
-- results of queries above not shown
-- verify the distribution of rows in the buckets
select count(*) from customers;
COUNT(*)
----------
9161
select count(*) cnt,ora_hash(last_name,3) hash_num from customers group by rollup(ora_hash(last_name,3));
CNT HASH_NUM
---------- ----------
2437 0
2433 1
2295 2
1996 3
9161
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment