Skip to content

Instantly share code, notes, and snippets.

@robrich
Created June 23, 2020 04:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save robrich/e873b5d347c623888b32689b821452c9 to your computer and use it in GitHub Desktop.
Save robrich/e873b5d347c623888b32689b821452c9 to your computer and use it in GitHub Desktop.
Reference Tables
create database 'tpc_h';
use 'tpc_h';
CREATE TABLE IF NOT EXISTS `lineitem` (
`l_orderkey` bigint(11) NOT NULL,
`l_partkey` int(11) NOT NULL,
`l_suppkey` int(11) NOT NULL,
`l_linenumber` int(11) NOT NULL,
`l_quantity` decimal(15,2) NOT NULL,
`l_extendedprice` decimal(15,2) NOT NULL,
`l_discount` decimal(15,2) NOT NULL,
`l_tax` decimal(15,2) NOT NULL,
`l_returnflag` char(1) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`l_linestatus` char(1) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`l_shipdate` date NOT NULL,
`l_commitdate` date NOT NULL,
`l_receiptdate` date NOT NULL,
`l_shipinstruct` char(25) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`l_shipmode` char(10) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`l_comment` varchar(44) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
shard key (`l_orderkey`),
primary key (`l_orderkey`, `l_linenumber`) -- no primary key in columnstore
);
CREATE OR REPLACE PIPELINE tpch_lineitem
AS LOAD DATA S3 'memsql-tpch-dataset/sf_100/lineitem/'
config '{"region":"us-east-1"}'
SKIP DUPLICATE KEY ERRORS
INTO TABLE lineitem
FIELDS TERMINATED BY '|'
LINES TERMINATED BY '|\n';
ALTER PIPELINE tpch_lineitem SET OFFSETS EARLIEST;
start pipeline tpch_lineitem;
show pipelines;
select count(*) from lineitem;
stop pipeline tpch_lineitem;
select * from lineitem;
-- now let's normalize it:
select distinct l_shipmode from lineitem order by l_shipmode;
create reference table shipmode (
shipmodekey int(11) not null,
shipmodedesc varchar(10) CHARACTER SET utf8 COLLATE utf8_general_ci not null,
primary key (shipmodekey)
);
insert into shipmode (shipmodekey, shipmodedesc) values
(1, 'AIR'), (2, 'FOB'), (3, 'MAIL'), (4, 'RAIL'),
(5, 'REG AIR'), (6, 'SHIP'), (7, 'TRUCK');
select *
from lineitem
inner join shipmode on lineitem.l_shipmode = shipmode.shipmodedesc;
alter table lineitem
add column l_shipmodekey int null;
update lineitem
inner join shipmode on lineitem.l_shipmode = shipmode.shipmodedesc
set lineitem.l_shipmodekey = shipmode.shipmodekey;
select * from lineitem;
select distinct l_shipinstruct from lineitem order by l_shipinstruct;
create reference table shipinstructions (
shipinstructionskey int(11) not null,
shipinstructionsdesc varchar(25) CHARACTER SET utf8 COLLATE utf8_general_ci not null,
primary key (shipinstructionskey)
);
insert into shipinstructions (shipinstructionskey, shipinstructionsdesc)
values (1, 'COLLECT COD'), (2, 'DELIVER IN PERSON'), (3, 'NONE'), (4, 'TAKE BACK RETURN');
select *
from lineitem
inner join shipinstructions on lineitem.l_shipinstruct = shipinstructions.shipinstructionsdesc;
alter table lineitem
add column l_shipinstructionskey int null;
update lineitem
inner join shipinstructions on lineitem.l_shipinstruct = shipinstructions.shipinstructionsdesc
set lineitem.l_shipinstructionskey = shipinstructions.shipinstructionskey;
select * from lineitem;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment