Skip to content

Instantly share code, notes, and snippets.

select * from foo tablesample system (10)
(QUERY
(QUERY_SPEC
(select (SELECT_LIST ALL_COLUMNS))
(from
(SAMPLED_RELATION
(TABLE (QNAME foo)) system 10))))
Query{queryBody=QuerySpecification{select=Select{distinct=false, selectItems=[*]}, from=[SampledRelation{relation=Table{foo}, type=SYSTEM, samplePercentage=10}], where=null, groupBy=[], having=null, orderBy=[], limit=null}, orderBy=[]}
@nileema
nileema / gist:6529739
Created September 11, 2013 21:02
tablesample examples
presto:default> select * from presto_test tablesample SYSTEM (10);
t_string | t_tinyint | t_smallint | t_int | t_bigint | t_float | t_double | t_map | t_boolean | t_ti
-------------------+-----------+------------+-------+----------+--------------------+----------+---------------------------+-----------+-----
sequencefile test | -54 | 203 | 204 | 205 | 206.10000610351562 | 207.2 | {"format":"sequencefile"} | true | 130
sequencefile test | -53 | 204 | 205 | 206 | 207.10000610351562 | 208.2 | {"format":"sequencefile"} | NULL | 130
sequencefile test | -52 | 205 | 206 | 207 | 208.10000610351562 | 209.2 | {"format":"sequencefile"} | false | 130
sequencefile test | -51 | 206 | 207 | 208 | 209.10000610351562 | 210.2 | {"format":"sequencefile"} | true | 130
sequencefile test | -50 | 207 | 208 | 209 | 210.10000610351562 | 211.2 | {"format
@nileema
nileema / gist:6677516
Created September 23, 2013 21:56
list bucketing, skewed tables
hive:di> create table test_nileema_skewed (c1 int, c2 int, c3 string) skewed by (c1) on (5) ;
OK
Time taken: 5.572 seconds
hive:di> desc formatted test_nileema_skewed;
OK
# col_name data_type comment
c1 int None
c2 int None
c3 string None
presto:default> select is_nan(nan());
_col0
-------
true
(1 row)
presto:default> select nan();
_col0
-------
2014-02-03T14:39:54.703-0800 DEBUG task-notification-85 com.facebook.presto.execution.TaskStateMachine Task 20140203_223943_05050_tf4an.1.28 is CANCELED
2014-02-03T14:39:54.890-0800 INFO task-notification-84 stderr Exception in thread "task-notification-84"
2014-02-03T14:39:54.891-0800 INFO task-notification-84 stderr java.lang.NullPointerException
2014-02-03T14:39:54.891-0800 INFO task-notification-84 stderr at com.facebook.presto.execution.SqlTaskExecution$3.onFailure(SqlTaskExecution.java:432)
2014-02-03T14:39:54.891-0800 INFO task-notification-84 stderr at com.google.common.util.concurrent.Futures$4.run(Futures.java:1160)
2014-02-03T14:39:54.891-0800 INFO task-notification-84 stderr at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
2014-02-03T14:39:54.891-0800 INFO task-notification-84 stderr at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
struct BlacklistEntry {
1: optional string user;
2: optional string source;
3: optional string host;
}
struct PrestoBlacklistConfig {
1: list<BlacklistEntry> blacklistedEntries;
}
Input
=======
fbid1,fbid2,name
9,9,z
99,99,zz
999,999,zzz
Output
=========
presto:default> select * from test_loader_column_names;
SELECT * FROM (SELECT row_number() over() rn, orderkey, orderstatus, custkey from orders) where rn < 10
- Output[rn, orderkey, orderstatus, custkey]
rn := row_1
- Window[] => [orderkey:bigint, custkey:bigint, orderstatus:varchar, row_1:bigint]
row_1 := row_number()
- Limit[9] => [orderkey:bigint, custkey:bigint, orderstatus:varchar]
- TableScan[local:tpch:orders:sf0.01, original constraint=true] => [orderkey:bigint, custkey:bigint, orderstatus:varchar]
orderkey := local:tpch:orderkey:0
custkey := local:tpch:custkey:1
orderstatus := local:tpch:orderstatus:2
presto:tiny> explain SELECT * FROM (SELECT row_number() over() rn, orderkey, orderstatus, custkey from orders) where rn < 10
-> ;
Query Plan
--------------------------------------------------------------------------------------------------------------------------------------
- Output[rn, orderkey, orderstatus, custkey]
rn := row_1
- Filter[("row_1" < 10)] => [orderkey:bigint, custkey:bigint, orderstatus:varchar, row_1:bigint]
- RowNumber[] => [orderkey:bigint, custkey:bigint, orderstatus:varchar, row_1:bigint]
row_1 := row_number()
- TableScan[tpch:tpch:orders:sf0.01, original constraint=true] => [orderkey:bigint, custkey:bigint, orderstatus:varchar]
=== Run with changes to groupByHash ===
Benchmark Mode Samples Score Score error Units
c.f.p.o.GroupByHashBenchmark.testContains avgt 10 0.000 0.000 ms/op
c.f.p.o.GroupByHashBenchmark.testExistingGroupIds avgt 10 0.006 0.000 ms/op // All groupIds already exist in group by hash
c.f.p.o.GroupByHashBenchmark.testMixGroupIds avgt 10 0.008 0.000 ms/op // Half groupIds exist in group by hash
c.f.p.o.GroupByHashBenchmark.testNewGroupIds avgt 10 0.025 0.000 ms/op // No groupIds exist in group by hash
=== Run on current trunk ===
Benchmark Mode Samples Score Score error Units
c.f.p.o.GroupByHashBenchmark.testContains avgt 10 0.000 0.000 ms/op