Skip to content

Instantly share code, notes, and snippets.

@zhjwpku
Created October 4, 2022 05:01
Show Gist options
  • Save zhjwpku/d8ffe0f37a8fb425c226b6f071187f36 to your computer and use it in GitHub Desktop.
Save zhjwpku/d8ffe0f37a8fb425c226b6f071187f36 to your computer and use it in GitHub Desktop.
Greenplum tpch lz4 compression test.
CREATE TABLE customer_lz4 (
c_custkey integer NOT NULL,
c_name character varying(25) NOT NULL,
c_address character varying(40) NOT NULL,
c_nationkey integer NOT NULL,
c_phone character(15) NOT NULL,
c_acctbal numeric(15,2) NOT NULL,
c_mktsegment character(10) NOT NULL,
c_comment character varying(117) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (c_custkey);
CREATE TABLE lineitem_lz4 (
l_orderkey bigint NOT NULL,
l_partkey integer NOT NULL,
l_suppkey integer NOT NULL,
l_linenumber integer NOT NULL,
l_quantity numeric(15,2) NOT NULL,
l_extendedprice numeric(15,2) NOT NULL,
l_discount numeric(15,2) NOT NULL,
l_tax numeric(15,2) NOT NULL,
l_returnflag character(1) NOT NULL,
l_linestatus character(1) NOT NULL,
l_shipdate date NOT NULL,
l_commitdate date NOT NULL,
l_receiptdate date NOT NULL,
l_shipinstruct character(25) NOT NULL,
l_shipmode char(10) NOT NULL,
l_comment varchar(44) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (l_orderkey);
CREATE TABLE nation_lz4 (
n_nationkey integer NOT NULL,
n_name character(25) NOT NULL,
n_regionkey integer NOT NULL,
n_comment character varying(152)
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (n_nationkey);
CREATE TABLE orders_lz4 (
o_orderkey bigint NOT NULL,
o_custkey integer NOT NULL,
o_orderstatus character(1) NOT NULL,
o_totalprice numeric(15,2) NOT NULL,
o_orderdate date NOT NULL,
o_orderpriority character(15) NOT NULL,
o_clerk character(15) NOT NULL,
o_shippriority integer NOT NULL,
o_comment character varying(79) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (o_orderkey);
CREATE TABLE part_lz4 (
p_partkey integer NOT NULL,
p_name character varying(55) NOT NULL,
p_mfgr character(25) NOT NULL,
p_brand character(10) NOT NULL,
p_type character varying(25) NOT NULL,
p_size integer NOT NULL,
p_container character(10) NOT NULL,
p_retailprice numeric(15,2) NOT NULL,
p_comment character varying(23) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (p_partkey);
CREATE TABLE partsupp_lz4 (
ps_partkey integer NOT NULL,
ps_suppkey integer NOT NULL,
ps_availqty integer NOT NULL,
ps_supplycost numeric(15,2) NOT NULL,
ps_comment character varying(199) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (ps_partkey);
CREATE TABLE region_lz4 (
r_regionkey integer NOT NULL,
r_name character(25) NOT NULL,
r_comment character varying(152)
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (r_regionkey);
CREATE TABLE supplier_lz4 (
s_suppkey integer NOT NULL,
s_name character(25) NOT NULL,
s_address character varying(40) NOT NULL,
s_nationkey integer NOT NULL,
s_phone character(15) NOT NULL,
s_acctbal numeric(15,2) NOT NULL,
s_comment character varying(101) NOT NULL
) with (appendoptimized=true, compresstype=lz4, compresslevel = 1, orientation=column) distributed by (s_suppkey);
\timing on
copy customer_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/customer.tbl' with delimiter '|' ;
copy lineitem_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/lineitem.tbl' with delimiter '|' ;
copy nation_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/nation.tbl' with delimiter '|' ;
copy orders_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/orders.tbl' with delimiter '|' ;
copy part_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/part.tbl' with delimiter '|' ;
copy partsupp_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/partsupp.tbl' with delimiter '|' ;
copy region_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/region.tbl' with delimiter '|' ;
copy supplier_lz4 from '/Users/zhjwpku/zhjwpku/data/tpch1G/supplier.tbl' with delimiter '|' ;
SELECT get_ao_compression_ratio('customer_lz4');
SELECT get_ao_compression_ratio('lineitem_lz4');
SELECT get_ao_compression_ratio('nation_lz4');
SELECT get_ao_compression_ratio('orders_lz4');
SELECT get_ao_compression_ratio('part_lz4');
SELECT get_ao_compression_ratio('partsupp_lz4');
SELECT get_ao_compression_ratio('region_lz4');
SELECT get_ao_compression_ratio('supplier_lz4');
-- Query 1
SELECT
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
FROM
lineitem_lz4
WHERE
l_shipdate <= date '1998-12-01' - interval '90' day
GROUP BY
l_returnflag,
l_linestatus
ORDER BY
l_returnflag,
l_linestatus;
-- Query 3
SELECT
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
FROM
customer_lz4,
orders_lz4,
lineitem_lz4
WHERE
c_mktsegment = 'BUILDING'
AND c_custkey = o_custkey
AND l_orderkey = o_orderkey
AND o_orderdate < date '1995-03-15'
AND l_shipdate > date '1995-03-15'
GROUP BY
l_orderkey,
o_orderdate,
o_shippriority
ORDER BY
revenue desc,
o_orderdate
LIMIT 20;
-- Query 5
SELECT
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
FROM
customer_lz4,
orders_lz4,
lineitem_lz4,
supplier_lz4,
nation_lz4,
region_lz4
WHERE
c_custkey = o_custkey
AND l_orderkey = o_orderkey
AND l_suppkey = s_suppkey
AND c_nationkey = s_nationkey
AND s_nationkey = n_nationkey
AND n_regionkey = r_regionkey
AND r_name = 'ASIA'
AND o_orderdate >= date '1994-01-01'
AND o_orderdate < date '1994-01-01' + interval '1' year
GROUP BY
n_name
ORDER BY
revenue desc;
-- Query 6
SELECT
sum(l_extendedprice * l_discount) as revenue
FROM
lineitem_lz4
WHERE
l_shipdate >= date '1994-01-01'
AND l_shipdate < date '1994-01-01' + interval '1' year
AND l_discount between 0.06 - 0.01 AND 0.06 + 0.01
AND l_quantity < 24;
-- Query 10
SELECT
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
FROM
customer_lz4,
orders_lz4,
lineitem_lz4,
nation_lz4
WHERE
c_custkey = o_custkey
AND l_orderkey = o_orderkey
AND o_orderdate >= date '1993-10-01'
AND o_orderdate < date '1993-10-01' + interval '3' month
AND l_returnflag = 'R'
AND c_nationkey = n_nationkey
GROUP BY
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
ORDER BY
revenue desc
LIMIT 20;
-- Query 12
SELECT
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
OR o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
AND o_orderpriority <> '2-HIGH'
then 1
else 0
end) AS low_line_count
FROM
orders_lz4,
lineitem_lz4
WHERE
o_orderkey = l_orderkey
AND l_shipmode in ('MAIL', 'SHIP')
AND l_commitdate < l_receiptdate
AND l_shipdate < l_commitdate
AND l_receiptdate >= date '1994-01-01'
AND l_receiptdate < date '1994-01-01' + interval '1' year
GROUP BY
l_shipmode
ORDER BY
l_shipmode;
-- Query 14
SELECT
100.00 * sum(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
FROM
lineitem_lz4,
part_lz4
WHERE
l_partkey = p_partkey
AND l_shipdate >= date '1995-09-01'
AND l_shipdate < date '1995-09-01' + interval '1' month;
-- Query 19
SELECT
sum(l_extendedprice* (1 - l_discount)) as revenue
FROM
lineitem_lz4,
part_lz4
WHERE
(
p_partkey = l_partkey
AND p_brand = 'Brand#12'
AND p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND l_quantity >= 1 AND l_quantity <= 1 + 10
AND p_size between 1 AND 5
AND l_shipmode in ('AIR', 'AIR REG')
AND l_shipinstruct = 'DELIVER IN PERSON'
)
OR
(
p_partkey = l_partkey
AND p_brand = 'Brand#23'
AND p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND l_quantity >= 10 AND l_quantity <= 10 + 10
AND p_size between 1 AND 10
AND l_shipmode in ('AIR', 'AIR REG')
AND l_shipinstruct = 'DELIVER IN PERSON'
)
OR
(
p_partkey = l_partkey
AND p_brand = 'Brand#34'
AND p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND l_quantity >= 20 AND l_quantity <= 20 + 10
AND p_size between 1 AND 15
AND l_shipmode in ('AIR', 'AIR REG')
AND l_shipinstruct = 'DELIVER IN PERSON'
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment