Skip to content

Instantly share code, notes, and snippets.

@dkobia
Forked from steveodom/athena_cheatsheet.md
Last active February 6, 2019 11:02
Show Gist options
  • Save dkobia/a32159a1c24b0ab1f018a7fa0765abe7 to your computer and use it in GitHub Desktop.
Save dkobia/a32159a1c24b0ab1f018a7fa0765abe7 to your computer and use it in GitHub Desktop.
AWS Athena / Hive / Presto Cheatsheet

Useful Links / Sources

Housekeeping

change column type

ALTER TABLE logs.trades CHANGE recentprice price int;

rename column

This is not supported by Athena apparently. You can drop the table and recreate it with the right column name.

ALTER TABLE trading_features.models RENAME COLUMN "indexchnge-20" TO "indexchange-20"

add column

ALTER TABLE logs.trades ADD COLUMNS (column1 string, column2 string);

Show Partitions

SHOW PARTITIONS logs.trades

Drop Partition

ALTER TABLE logs.trades DROP PARTITION (year='2017',week='22',day='We')

Drop Table

DROP TABLE IF EXISTS logs.trades

Counts

DISTINCT

select count( distinct ticker) from trading_features.features

Math

Division

I found that I had to cast each integer as doubles to divide and get two decimals.

SELECT round(cast(down AS double) / cast(total AS double),2) AS down FROM trades

Percentiles

SELECT 
 approx_percentile(close, 0.15) as low, 
 approx_percentile(close, 0.5) as mid, 
 approx_percentile(close, 0.85) as high 
FROM tablename

Selects

Starts With

select ticker from trading_features.features where upper(ticker) like '%A%'

Sums/Group By

SELECT * FROM
  (SELECT ticker,
         count(*) AS total_trades,
         sum( IF(profit>0,
          1,
          0) ) AS plus,
         sum(IF(profit<0,
           1,
           0)) AS minus,
         cast(sum( IF(profit>0,
          1,
         0) ) AS double) / cast(count(*) AS double) AS pct
  FROM 
      (SELECT ticker,
           id,
           sum(price) AS profit
      FROM logs.trades
      GROUP BY  id, ticker) AS trades
  GROUP BY  ticker) as totals
WHERE total_trades > 20
ORDER BY pct DESC
# Sums/Group By (2)
SELECT ticker,
         count(*) AS total_trades,
         sum(
           IF(profit>0,1,0)
         ) AS ups,
         sum(IF(profit<0,
         1,
         0)) AS downs
FROM 
    (SELECT ticker,
         id,
         sum(price) AS profit
    FROM logs.trades
    GROUP BY  id, ticker) AS trades
    GROUP BY  ticker
ORDER BY ups DESC
ORDER BY total_trades

With Example (Nicer)

WITH 
  grouped AS (SELECT 
   "mxc-1" as sequence,
   count(*) AS total,
         sum( IF(change='"down-flat"',
          1,
          0) ) AS df,
         sum(IF(change='"up-flat"',
           1,
           0)) AS uf,
         sum(IF(change='"up"',
           1,
           0)) AS up,
         sum(IF(change='"down"',
           1,
           0)) AS down
  FROM trading_features.models
  GROUP BY "mxc-1", change)
SELECT 
 grouped.sequence, 
 sum(grouped.total)as total, 
 sum(grouped.down) as down, 
 sum(grouped.df) as df, 
 sum(grouped.uf) as uf, 
 sum(grouped.up) as up
FROM grouped

Nested

SELECT ticker, 
       sum(profit) as total_profit
FROM
  (SELECT 
  ticker,
  id,
       sum(price) as profit
  FROM logs.trades
  GROUP BY  id, ticker) as trades
 GROUP BY ticker
 ORDER BY total_profit DESC

Create Tables

1.

CREATE EXTERNAL TABLE IF NOT EXISTS logs.trades (
  `id` string,
  `model` string,
  `side` string,
  `ticker` string,
  `tickindex` string,
  `time` string,
  `recentprice` string 
) PARTITIONED BY (
  year string,
  weekOfYear string,
  dayOfWeek string 
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
) LOCATION 's3://trading-logs-trades/trades/'
TBLPROPERTIES ('has_encrypted_data'='false')

Workarounds

Column Names with hyphens

Use ""

SELECT "mxc-1",
FROM trading_features.models
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment