Created
April 7, 2016 21:43
-
-
Save diana-shealy/16cc759fe13d0ee0ca800ce3d3488986 to your computer and use it in GitHub Desktop.
Create the Training Feature Vector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH train_ordered as ( | |
select * from training2 | |
order by rowid asc | |
), | |
train_quantified as ( | |
select | |
t0.rowid, | |
t0.sales, | |
t2.*, | |
t0.competitiondistance | |
from | |
train_ordered t0 | |
-- indexing non-number columns | |
LATERAL VIEW quantify(true, | |
stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day) t2 | |
as stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day | |
) | |
INSERT OVERWRITE TABLE training3 | |
SELECT | |
t1.rowid, | |
ARRAY( -- padding zero for missing values | |
t1.stateholiday, t1.store, t1.promo, t1.dayofweek, t1.schoolholiday, | |
IF(t1.promo2sinceweek IS NULL, 0, t1.promo2sinceweek), | |
IF(t1.competitionopensinceyear IS NULL, 0, t1.competitionopensinceyear), | |
t1.assortment, | |
IF(t1.promo2sinceyear IS NULL, 0, t1.promo2sinceyear), | |
IF(t1.promointerval IS NULL, 0, t1.promointerval), | |
t1.promo2, t1.storetype, | |
IF(t1.competitionopensincemonth IS NULL, 0, t1.competitionopensincemonth), | |
t1.year, t1.month, t1.day, | |
IF(t1.competitiondistance IS NULL, 0, t1.competitiondistance) | |
) AS features, | |
LN(1 + t1.sales) AS label, -- log scale conversion | |
t1.sales | |
FROM | |
train_quantified t1 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment