Skip to content

Instantly share code, notes, and snippets.

View diana-shealy's full-sized avatar

Diana Shealy diana-shealy

View GitHub Profile
SELECT
TD_TIME_FORMAT(time, ‘yyyy-MM’) as MONTH,
category,
MAX(usage) as total_max_usase
FROM usage_snapshot
SELECT
TD_TIME_FORMAT(time, ‘yyyy-MM’) as MONTH,
category,
usage
FROM usage_snapshot
WHERE TD_TIME_RANGE(time, ‘2016-04-01’)
SELECT
Month,
CONCAT(Group1, “-”, Group2) as NewGroup,
SUM(Usage) as total_usage
FROM usage
GROUP BY 1, 2
SELECT
Month,
Group1,
Group2,
Group3,
CONCAT(Group1, “-”, Group2) as NewGroup,
SUM(Usage) as total_usage
FROM usage
GROUP BY 1, 2, 3, 4, 5
@diana-shealy
diana-shealy / submission.sql
Created April 7, 2016 22:00
Sorting the Results
SELECT
id,
predicted as sales
FROM
prediction
ORDER BY id ASC;
@diana-shealy
diana-shealy / prediction.sql
Created April 7, 2016 21:59
Create the Predictions
INSERT OVERWRITE TABLE prediction
SELECT
id,
EXP(predicted)-1 as predicted
FROM(
SELECT
id,
avg(predicted) AS predicted
FROM(
SELECT
@diana-shealy
diana-shealy / make_model.sql
Created April 7, 2016 21:50
Create the model
INSERT OVERWRITE TABLE model
SELECT
-- C: Categorical Variable, Q: Quantitative Variable
train_randomforest_regr(features, label, '-trees 20 -attrs C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,Q')
FROM
training3
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20 -attrs C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,Q')
FROM
@diana-shealy
diana-shealy / test_quantify.sql
Created April 7, 2016 21:49
Convert test data into a vector
WITH train_test as (
select
1 as train_first, false as output_row,
rowid, stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
from
training2
union all
select
2 as train_first, true as output_row,
rowid, stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
@diana-shealy
diana-shealy / test_join.sql
Created April 7, 2016 21:46
Joining the testing data with store information
INSERT OVERWRITE TABLE testing2
SELECT
rowid() as rowid,
t1.id,
t1.stateholiday, t1.store, t1.promo, t1.dayofweek, t1.date, t1.schoolholiday,
SUBSTR(t1.date,1,4) as year,
SUBSTR(t1.date,6,2) as month,
SUBSTR(t1.date,9,2) as day,
t2.promo2sinceweek,
t2.competitionopensinceyear,
@diana-shealy
diana-shealy / train_quantify.sql
Created April 7, 2016 21:43
Create the Training Feature Vector
WITH train_ordered as (
select * from training2
order by rowid asc
),
train_quantified as (
select
t0.rowid,
t0.sales,
t2.*,
t0.competitiondistance