yunpengn/dss.ri

## dss.ri
-- Sccsid:     @(#)dss.ri	2.1.8.1
-- TPC-H Benchmark Version 8.0

-- For table REGION
ALTER TABLE REGION
	ADD PRIMARY KEY (R_REGIONKEY);

-- For table NATION
ALTER TABLE NATION
	ADD PRIMARY KEY (N_NATIONKEY);

ALTER TABLE NATION
	ADD CONSTRAINT NATION_FK1
	FOREIGN KEY (N_REGIONKEY) references REGION;

-- For table PART
ALTER TABLE PART
	ADD PRIMARY KEY (P_PARTKEY);

-- For table SUPPLIER
ALTER TABLE SUPPLIER
	ADD PRIMARY KEY (S_SUPPKEY);

ALTER TABLE SUPPLIER
	ADD CONSTRAINT SUPPLIER_FK1
	FOREIGN KEY (S_NATIONKEY) references NATION;

-- For table PARTSUPP
ALTER TABLE PARTSUPP
	ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);

-- For table CUSTOMER
ALTER TABLE CUSTOMER
	ADD PRIMARY KEY (C_CUSTKEY);

ALTER TABLE CUSTOMER
	ADD CONSTRAINT CUSTOMER_FK1
	FOREIGN KEY (C_NATIONKEY) references NATION;

-- For table LINEITEM
ALTER TABLE LINEITEM
	ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);

-- For table ORDERS
ALTER TABLE ORDERS
	ADD PRIMARY KEY (O_ORDERKEY);

-- For table PARTSUPP
ALTER TABLE PARTSUPP
	ADD CONSTRAINT PARTSUPP_FK1
	FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;

ALTER TABLE PARTSUPP
	ADD CONSTRAINT PARTSUPP_FK2
	FOREIGN KEY (PS_PARTKEY) references PART;

-- For table ORDERS
ALTER TABLE ORDERS
	ADD CONSTRAINT ORDERS_FK1
	FOREIGN KEY (O_CUSTKEY) references CUSTOMER;

-- For table LINEITEM
ALTER TABLE LINEITEM
	ADD CONSTRAINT LINEITEM_FK1
	FOREIGN KEY (L_ORDERKEY) references ORDERS;

ALTER TABLE LINEITEM
	ADD CONSTRAINT LINEITEM_FK2
	FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;

## gen-tcp-H.md

      
    Raw
  

              gen-tcp-H.md
            
          
    How to generate dataset using TPC-H

This is a step-by-step guide on how to generate dataset using TPC-H.

Download the database generation tool from here.
To generate TPC-H compliant datasets, we must use the dbgen tool.
Compile the dbgen tool by make -f makefile.suite.

Remember to modify makefile.suite.


CC       = gcc
DATABASE = INFORMIX
MACHINE  = LINUX
WORKLOAD = TPCH

Use the dbgen tool with the following options:

For example, you can use ./dbgen -s 1 -v


option  argument    default     action
------  --------    -------     ------
-h                              Display a usage summary

-f      none                    Force. Existing data files will be overwritten.

-F      none        yes         Flat file output.

-D      none                    Direct database load. ld_XXXX() routines must be defined in load_stub.c

-s      <scale>     1           Scale of the database population. Scale 1.0 represents ~1 GB of data

-T      <table>                 Generate the data for a particular table ONLY. Arguments: 
                                p -- part/partuspp, 
                                c -- customer, s -- supplier, 
                                o -- orders/lineitem, n -- nation, r -- region,
                                l -- code (same as n and r),
                                O -- orders, L -- lineitem, P -- part, 
                                S -- partsupp

-O      d                       Generate SQL for delete function instead of key ranges

-O      f                       Allow over-ride of default output file names

-O      h                       Generate headers in flat ascii files. hd_XXX routines must be defined in load_stub.c

-O      m                       Flat files generate fixed length records

-O      r                       Generate key ranges for the UF2 update function

-O      v                       Verify data set without generating it.

-r      <percentage>     10     Scale each udpate file to the given percentage (expressed in basis points) of the data set

-v      none                    Verbose. Progress messages are displayed as data is generated.

-n      <name>                  Use database <name> for in-line load

-C      <children>              Use <children> separate processes to generate data

-S      <n>                     Generate the <n>th part of a multi-part load or update set

-U      <updates>               Create a specified number of data sets in flat files for the update/delete functions

-i      <n>                     Split the inserted rows in an refresh pair between <n> files

-d      <n>                     Split the deleted rows in an refresh pair between <n> files


Now, you should see a few XXX.tbl files. However, there is a bug in dbgen which generates an extra | at the end of each line. To fix it, run the following command:

for i in `ls *.tbl`; do sed 's/|$//' $i > ${i/tbl/csv}; echo $i; done;

Assume you have a PostgreSQL instance. Now load the data into it:

# Creates the schema.
psql -c "DROP DATABASE IF EXISTS tpc"
psql -c "CREATE DATABASE tpc"
psql -d tpc -f dss.ddl

# Adds primary keys & foreign keys.
psql -d tpc -f dss.ri

# Loads data.
psql -d tpc -c "COPY region FROM 'region.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY nation FROM 'nation.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY customer FROM 'customer.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY supplier FROM 'supplier.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY part FROM 'part.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY partsupp FROM 'partsupp.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY orders FROM 'orders.csv' WITH (FORMAT csv, DELIMITER '|')";
psql -d tpc -c "COPY lineitem FROM 'lineitem.csv' WITH (FORMAT csv, DELIMITER '|')";
	-- Sccsid: @(#)dss.ri 2.1.8.1
	-- TPC-H Benchmark Version 8.0

	-- For table REGION
	ALTER TABLE REGION
	ADD PRIMARY KEY (R_REGIONKEY);

	-- For table NATION
	ALTER TABLE NATION
	ADD PRIMARY KEY (N_NATIONKEY);

	ALTER TABLE NATION
	ADD CONSTRAINT NATION_FK1
	FOREIGN KEY (N_REGIONKEY) references REGION;

	-- For table PART
	ALTER TABLE PART
	ADD PRIMARY KEY (P_PARTKEY);

	-- For table SUPPLIER
	ALTER TABLE SUPPLIER
	ADD PRIMARY KEY (S_SUPPKEY);

	ALTER TABLE SUPPLIER
	ADD CONSTRAINT SUPPLIER_FK1
	FOREIGN KEY (S_NATIONKEY) references NATION;

	-- For table PARTSUPP
	ALTER TABLE PARTSUPP
	ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);

	-- For table CUSTOMER
	ALTER TABLE CUSTOMER
	ADD PRIMARY KEY (C_CUSTKEY);

	ALTER TABLE CUSTOMER
	ADD CONSTRAINT CUSTOMER_FK1
	FOREIGN KEY (C_NATIONKEY) references NATION;

	-- For table LINEITEM
	ALTER TABLE LINEITEM
	ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);

	-- For table ORDERS
	ALTER TABLE ORDERS
	ADD PRIMARY KEY (O_ORDERKEY);

	-- For table PARTSUPP
	ALTER TABLE PARTSUPP
	ADD CONSTRAINT PARTSUPP_FK1
	FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;

	ALTER TABLE PARTSUPP
	ADD CONSTRAINT PARTSUPP_FK2
	FOREIGN KEY (PS_PARTKEY) references PART;

	-- For table ORDERS
	ALTER TABLE ORDERS
	ADD CONSTRAINT ORDERS_FK1
	FOREIGN KEY (O_CUSTKEY) references CUSTOMER;

	-- For table LINEITEM
	ALTER TABLE LINEITEM
	ADD CONSTRAINT LINEITEM_FK1
	FOREIGN KEY (L_ORDERKEY) references ORDERS;

	ALTER TABLE LINEITEM
	ADD CONSTRAINT LINEITEM_FK2
	FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;