Skip to content

Instantly share code, notes, and snippets.

@nileema
nileema / gist:6677516
Created September 23, 2013 21:56
list bucketing, skewed tables
hive:di> create table test_nileema_skewed (c1 int, c2 int, c3 string) skewed by (c1) on (5) ;
OK
Time taken: 5.572 seconds
hive:di> desc formatted test_nileema_skewed;
OK
# col_name data_type comment
c1 int None
c2 int None
c3 string None
[INFO] Scanning for projects...
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building presto-docs 0.144-SNAPSHOT
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-checkstyle-plugin:2.17:check (default) @ presto-docs ---
[INFO] Starting audit...
Audit done.
[INFO]
presto:tiny> explain (type distributed)
select * from
(select orderkey + 1 orderkey from orders) o
join
(select orderkey + 1 orderkey from orders) o1
on (o.orderkey = o1.orderkey);
Query Plan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Fragment 0 [SINGLE]
== 8u74 ==
$ /usr/local/jdk1.8.0_74/bin/java -XX:+PrintCodeCache -XX:+UseG1GC -mx20g -ms20g -XX:ReservedCodeCacheSize=20m -jar jittest-1.0-SNAPSHOT-standalone.jar
Timestamp Compilation Time(ms) Code Cache Size(MB) Perm Gen/Metaspace Size(MB) Invocations/s
1456789633030 2146 6 5 41
1456789634035 5595 8 6 90
1456789635037 7494 9 6 121
1456789636038 9567 9 7 148
1456789637039 11747 9 7 174
1456789638040 14306 10 8 197
== Run with 8u25 ==
$ /usr/local/jdk-8u25-64/bin/java -XX:+PrintCodeCache -XX:+UseG1GC -mx20g -ms20g -XX:ReservedCodeCacheSize=20m -jar jittest-1.0-SNAPSHOT-standalone.jar
Timestamp Compilation Time Code Cache Size Perm Gen/Metaspace Size Invocations/s
1456786460321 1918 6.0 6.0 31.98
1456786461337 4864 8.0 6.0 62.12
1456786462338 6585 9.0 7.0 84.19
1456786463339 8108 9.0 7.0 101.52
1456786464340 10053 10.0 8.0 124.16
1456786465341 12062 10.0 8.0 140.01
1456786466341 14310 11.0 9.0 156.02
at com.facebook.presto.raptor.util.DatabaseUtil.metadataError(DatabaseUtil.java:67)
at com.facebook.presto.raptor.metadata.ShardIterator.computeNext(ShardIterator.java:101)
at com.facebook.presto.raptor.metadata.ShardIterator.computeNext(ShardIterator.java:48)
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143)
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138)
at com.facebook.presto.raptor.util.SynchronizedResultIterator.hasNext(SynchronizedResultIterator.java:41)
at com.facebook.presto.raptor.RaptorSplitManager$RaptorSplitSource.isFinished(RaptorSplitManager.java:179)
at com.facebook.presto.split.ConnectorAwareSplitSource.isFinished(ConnectorAwareSplitSource.java:59)
at com.facebook.presto.execution.scheduler.SourcePartitionedScheduler.schedule(SourcePartitionedScheduler.java:76)
at com.facebook.presto.execution.scheduler.SqlQueryScheduler.schedule(SqlQueryScheduler.java:304)
select *
FROM
(select orderkey, count(*) from orders group by orderkey) a
JOIN
(select orderkey, count(*) from lineitem group by orderkey) b
on a.orderkey = b.orderkey
Query Plan
---------------------------------------------------------------------------------------------------------------------------------------------
Fragment 0 [SINGLE]
presto:tiny> explain (type distributed)
select * from (select orderkey, mod(orderkey,3) from orders) o
join
(select orderkey, mod(orderkey, 3) from orders) o1
on (o.orderkey = o1.orderkey);
Query Plan
----------------------------------------------------------------------------------------------------------------------------------------
Fragment 0 [SINGLE]
Output layout: [orderkey, mod, orderkey_6, mod_17]
- Output[orderkey, len, orderkey, len] => [orderkey:bigint, mod:bigint, orderkey_6:bigint, mod_17:bigint]
== 8u45 ==
$ /usr/local/jdk-8u45-64/bin/java -XX:+PrintCodeCache -XX:+UseG1GC -mx20g -ms20g -XX:ReservedCodeCacheSize=20m -jar jittest-1.0-SNAPSHOT-standalone.jar
Timestamp Compilation Time(ms) Code Cache Size(MB) Perm Gen/Metaspace Size(MB) Invocations/s
1456788247368 2880 5 5 27
1456788248374 6144 7 6 57
1456788249375 8445 8 6 81
1456788250377 9907 9 6 99
1456788251378 12090 8 7 124
1456788252380 13760 9 7 134
== Run with 8u25 ==
$ /usr/local/jdk-8u25-64/bin/java -XX:+PrintCodeCache -XX:+UseG1GC -mx20g -ms20g -XX:ReservedCodeCacheSize=20m -jar jittest-1.0-SNAPSHOT-standalone.jar
Timestamp Compilation Time Code Cache Size Perm Gen/Metaspace Size Invocations/s
1456786460321 1918 6.0 6.0 31.98
1456786461337 4864 8.0 6.0 62.12
1456786462338 6585 9.0 7.0 84.19
1456786463339 8108 9.0 7.0 101.52
1456786464340 10053 10.0 8.0 124.16
1456786465341 12062 10.0 8.0 140.01
1456786466341 14310 11.0 9.0 156.02