I hereby claim:
- I am gregrahn on github.
- I am gregrahn (https://keybase.io/gregrahn) on keybase.
- I have a public key whose fingerprint is 9C32 D323 4E55 8113 FE4B CFEB FA4D 0C79 A267 C6C4
To claim this, I am signing this object:
#!/usr/bin/env python | |
# encoding: utf-8 | |
import sys | |
import urllib | |
import codecs | |
import json | |
import unicodecsv | |
import dateutil.parser as parser |
import time | |
import sys | |
import os | |
cmd = "./dsqgen -QUIET Y -DIALECT netezza -SCALE 100000 -TEMPLATE crash.tpl -DIRECTORY ../query_templates" | |
template1 = ''' | |
define C= ulist(random(1, {}, uniform),5); | |
select |
cask 'cloudera-impala-odbc' do | |
version '2.6.8.1008' | |
sha256 'dbc9a460004b93b39c1d9bfedf2c181226ee629e59c84774708ffd8fd4585a8d' | |
url "https://downloads.cloudera.com/connectors/ClouderaImpala_ODBC_#{version}/macOS/ClouderaImpalaODBC.dmg" | |
name 'Cloudera ODBC Driver for Impala' | |
homepage 'https://www.cloudera.com/downloads' | |
pkg 'ClouderaImpalaODBC.pkg' |
-- | |
-- list db/table/num_rows for tables with stats | |
-- | |
SELECT | |
DBS.NAME AS DB_NAME, | |
TBLS.TBL_NAME AS TABLE_NAME, | |
TABLE_PARAMS.PARAM_VALUE as NUM_ROWS | |
FROM DBS | |
JOIN TBLS USING (DB_ID) | |
JOIN TABLE_PARAMS USING (TBL_ID) |
create table rj (l1 int, l2 int, l3 int); | |
insert into rj values (0,0,0); | |
insert into rj values (1,1,1); | |
insert into rj values (1,0,1); | |
insert into rj values (0,1,0); | |
insert into rj values (1,0,0); | |
select * from rj; | |
l1 | l2 | l3 |
-- TPC-DS v2.11.0 | |
-- ./dsqgen -DIRECTORY ../query_templates -INPUT ../query_templates/templates.lst -VERBOSE Y -QUALIFY Y -SCALE 10000 -DIALECT netezza -OUTPUT_DIR /tmp | |
-- start query 1 in stream 0 using template query1.tpl | |
with customer_total_return as | |
(select sr_customer_sk as ctr_customer_sk | |
,sr_store_sk as ctr_store_sk | |
,sum(SR_FEE) as ctr_total_return | |
from store_returns | |
,date_dim |
Three comparison points: | |
Presto + RCFile vs Impala + RCFile vs Impala + Parquet | |
Note: Query time, CPU utilization, Disk read tput (KBRead) | |
Impala v1.1.1 | |
Presto v0.52 | |
================================================================================================================================ | |
Presto + RCFile: | |
select ss_sold_date_sk, count(*) from store_sales_rcfile group by 1 order by 1 limit 2000; |
-- Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production | |
-- Using 1GB TPC-DS | |
-- Table DDL: https://raw.githubusercontent.com/gregrahn/tpcds-kit/master/tools/tpcds.sql | |
-- WinMagic paper: "WinMagic: subquery elimination using window aggregation" | |
-- https://pdfs.semanticscholar.org/0bfa/e505ad588d00d4b204acf8ba4b5646eac244.pdf | |
alter session set nls_date_format = 'YYYY-MM-DD'; | |
-- start query 1 in stream 0 using template query92.tpl |
I hereby claim:
To claim this, I am signing this object:
/* Instructions on compilation and execution | |
* ========================================= | |
* | |
* Compile this program with pthreads: | |
* | |
* g++ -Wall -lpthread -o graphdb-simulator graphdb-simulator.cpp | |
* | |
* Before you run this program, you need to create the following | |
* directories: | |
* |