Skip to content

Instantly share code, notes, and snippets.

@diana-shealy
Created April 7, 2016 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save diana-shealy/81c7a847401757f2dabda945d02a2287 to your computer and use it in GitHub Desktop.
Save diana-shealy/81c7a847401757f2dabda945d02a2287 to your computer and use it in GitHub Desktop.
Convert test data into a vector
WITH train_test as (
select
1 as train_first, false as output_row,
rowid, stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
from
training2
union all
select
2 as train_first, true as output_row,
rowid, stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
from
testing2
),
train_test_ordered as (
select * from train_test
order by train_first asc, rowid asc
),
test_quantified as (
select
t1.rowid,
t2.*
from
train_test_ordered t1
LATERAL VIEW quantify(output_row,
stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
) t2 as stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
)
INSERT OVERWRITE TABLE testing3
SELECT
t3.rowid, t4.id,
ARRAY(
t3.stateholiday, t3.store, t3.promo, t3.dayofweek, t3.schoolholiday,
IF(t3.promo2sinceweek IS NULL, 0, t3.promo2sinceweek),
IF(t3.competitionopensinceyear IS NULL, 0, t3.competitionopensinceyear),
t3.assortment,
IF(t3.promo2sinceyear IS NULL, 0, t3.promo2sinceyear),
IF(t3.promointerval IS NULL, 0, t3.promointerval),
t3.promo2, t3.storetype,
IF(t3.competitionopensincemonth IS NULL, 0, t3.competitionopensincemonth),
t3.year, t3.month, t3.day,
IF(t3.competitiondistance IS NULL, 0, t3.competitiondistance)
) AS features
FROM
test_quantified t3
LEFT OUTER JOIN testing2 t4 ON (t3.rowid=t4.rowid);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment