Created
March 10, 2016 07:08
-
-
Save puyokw/4c61e8db419aaa72a81c to your computer and use it in GitHub Desktop.
td_intern criteo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INSERT OVERWRITE TABLE fm_test | |
select | |
t2.rowid, | |
concat_array( | |
array_remove(array( | |
if(t2.c1 is null, null, cast( mhash(concat("c1:", t2.c1)) as string) ), | |
if(t2.c2 is null, null, cast( mhash(concat("c2:", t2.c2)) as string) ), | |
if(t2.c3 is null, null, cast( mhash(concat("c3:", t2.c3)) as string) ), | |
if(t2.c4 is null, null, cast( mhash(concat("c4:", t2.c4)) as string) ), | |
if(t2.c5 is null, null, cast( mhash(concat("c5:", t2.c5)) as string) ), | |
if(t2.c6 is null, null, cast( mhash(concat("c6:", t2.c6)) as string) ), | |
if(t2.c7 is null, null, cast( mhash(concat("c7:", t2.c7)) as string) ), | |
if(t2.c8 is null, null, cast( mhash(concat("c8:", t2.c8)) as string) ), | |
if(t2.c9 is null, null, cast( mhash(concat("c9:", t2.c9)) as string) ), | |
if(t2.c10 is null, null, cast( mhash(concat("c10:", t2.c10)) as string) ), | |
if(t2.c11 is null, null, cast( mhash(concat("c11:", t2.c11)) as string) ), | |
if(t2.c12 is null, null, cast( mhash(concat("c12:", t2.c12)) as string) ), | |
if(t2.c13 is null, null, cast( mhash(concat("c13:", t2.c13)) as string) ), | |
if(t2.c14 is null, null, cast( mhash(concat("c14:", t2.c14)) as string) ), | |
if(t2.c15 is null, null, cast( mhash(concat("c15:", t2.c15)) as string) ), | |
if(t2.c16 is null, null, cast( mhash(concat("c16:", t2.c16)) as string) ), | |
if(t2.c17 is null, null, cast( mhash(concat("c17:", t2.c17)) as string) ), | |
if(t2.c18 is null, null, cast( mhash(concat("c18:", t2.c18)) as string) ), | |
if(t2.c19 is null, null, cast( mhash(concat("c19:", t2.c19)) as string) ), | |
if(t2.c20 is null, null, cast( mhash(concat("c20:", t2.c20)) as string) ), | |
if(t2.c21 is null, null, cast( mhash(concat("c21:", t2.c21)) as string) ), | |
if(t2.c22 is null, null, cast( mhash(concat("c22:", t2.c22)) as string) ), | |
if(t2.c23 is null, null, cast( mhash(concat("c23:", t2.c23)) as string) ), | |
if(t2.c24 is null, null, cast( mhash(concat("c24:", t2.c24)) as string) ), | |
if(t2.c25 is null, null, cast( mhash(concat("c25:", t2.c25)) as string) ), | |
if(t2.c26 is null, null, cast( mhash(concat("c26:", t2.c26)) as string) ) | |
), null), | |
t1.features | |
) as features | |
from | |
test_ordered t2 | |
LEFT OUTER JOIN test_quantative t1 ON (t2.rowid=t1.rowid); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INSERT OVERWRITE TABLE fm_train | |
select | |
t2.rowid, | |
concat_array( | |
array_remove(array( | |
if(t2.c1 is null, null, cast( mhash(concat("c1:", t2.c1)) as string) ), | |
if(t2.c2 is null, null, cast( mhash(concat("c2:", t2.c2)) as string) ), | |
if(t2.c3 is null, null, cast( mhash(concat("c3:", t2.c3)) as string) ), | |
if(t2.c4 is null, null, cast( mhash(concat("c4:", t2.c4)) as string) ), | |
if(t2.c5 is null, null, cast( mhash(concat("c5:", t2.c5)) as string) ), | |
if(t2.c6 is null, null, cast( mhash(concat("c6:", t2.c6)) as string) ), | |
if(t2.c7 is null, null, cast( mhash(concat("c7:", t2.c7)) as string) ), | |
if(t2.c8 is null, null, cast( mhash(concat("c8:", t2.c8)) as string) ), | |
if(t2.c9 is null, null, cast( mhash(concat("c9:", t2.c9)) as string) ), | |
if(t2.c10 is null, null, cast( mhash(concat("c10:", t2.c10)) as string) ), | |
if(t2.c11 is null, null, cast( mhash(concat("c11:", t2.c11)) as string) ), | |
if(t2.c12 is null, null, cast( mhash(concat("c12:", t2.c12)) as string) ), | |
if(t2.c13 is null, null, cast( mhash(concat("c13:", t2.c13)) as string) ), | |
if(t2.c14 is null, null, cast( mhash(concat("c14:", t2.c14)) as string) ), | |
if(t2.c15 is null, null, cast( mhash(concat("c15:", t2.c15)) as string) ), | |
if(t2.c16 is null, null, cast( mhash(concat("c16:", t2.c16)) as string) ), | |
if(t2.c17 is null, null, cast( mhash(concat("c17:", t2.c17)) as string) ), | |
if(t2.c18 is null, null, cast( mhash(concat("c18:", t2.c18)) as string) ), | |
if(t2.c19 is null, null, cast( mhash(concat("c19:", t2.c19)) as string) ), | |
if(t2.c20 is null, null, cast( mhash(concat("c20:", t2.c20)) as string) ), | |
if(t2.c21 is null, null, cast( mhash(concat("c21:", t2.c21)) as string) ), | |
if(t2.c22 is null, null, cast( mhash(concat("c22:", t2.c22)) as string) ), | |
if(t2.c23 is null, null, cast( mhash(concat("c23:", t2.c23)) as string) ), | |
if(t2.c24 is null, null, cast( mhash(concat("c24:", t2.c24)) as string) ), | |
if(t2.c25 is null, null, cast( mhash(concat("c25:", t2.c25)) as string) ), | |
if(t2.c26 is null, null, cast( mhash(concat("c26:", t2.c26)) as string) ) | |
), null), | |
t1.features | |
) as features, | |
t2.label | |
from | |
train_ordered t2 | |
LEFT OUTER JOIN train_quantative t1 ON (t2.rowid=t1.rowid); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH test_nonzero as ( | |
select | |
t2.rowid as rowid, | |
IF( t2.l1 IS NULL, 0, t2.l1) as l1, | |
IF( t2.l2 IS NULL, 0, t2.l2) as l2, | |
IF( t2.l3 IS NULL, 0, t2.l3) as l3, | |
IF( t2.l4 IS NULL, 0, t2.l4) as l4, | |
IF( t2.l5 IS NULL, 0, t2.l5) as l5, | |
IF( t2.l6 IS NULL, 0, t2.l6) as l6, | |
IF( t2.l7 IS NULL, 0, t2.l7) as l7, | |
IF( t2.l8 IS NULL, 0, t2.l8) as l8, | |
IF( t2.l9 IS NULL, 0, t2.l9) as l9, | |
IF( t2.l10 IS NULL, 0, t2.l10) as l10, | |
IF( t2.l11 IS NULL, 0, t2.l11) as l11, | |
IF( t2.l12 IS NULL, 0, t2.l12) as l12, | |
IF( t2.l13 IS NULL, 0, t2.l13) as l13 | |
from | |
test_ordered t2 | |
), quantative as ( | |
select rowid, 16777217 + 1 as feature, l1 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 2 as feature, l2 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 3 as feature, l3 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 4 as feature, l4 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 5 as feature, l5 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 6 as feature, l6 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 7 as feature, l7 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 8 as feature, l8 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 9 as feature, l9 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 10 as feature, l10 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 11 as feature, l11 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 12 as feature, l12 as value from test_nonzero | |
union all | |
select rowid, 16777217 + 13 as feature, l13 as value from test_nonzero | |
), | |
quantative_stats as ( | |
select | |
feature, | |
avg(value) as mean, stddev_pop(value) as stddev, | |
min(value) as min, max(value) as max | |
from | |
quantative | |
group by | |
feature | |
),quantative_norm as ( | |
select | |
t1.rowid, | |
collect_list( | |
feature( | |
t1.feature, rescale(t1.value, t2.min, t2.max) | |
) | |
) as features | |
from | |
quantative t1 | |
JOIN quantative_stats t2 ON (t1.feature = t2.feature) | |
group by | |
t1.rowid | |
)INSERT OVERWRITE TABLE test_quantative | |
SELECT | |
rowid, features | |
FROM | |
quantative_norm t1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH train_nonzero as ( | |
select | |
t2.rowid as rowid, | |
IF( t2.l1 IS NULL, 0, t2.l1) as l1, | |
IF( t2.l2 IS NULL, 0, t2.l2) as l2, | |
IF( t2.l3 IS NULL, 0, t2.l3) as l3, | |
IF( t2.l4 IS NULL, 0, t2.l4) as l4, | |
IF( t2.l5 IS NULL, 0, t2.l5) as l5, | |
IF( t2.l6 IS NULL, 0, t2.l6) as l6, | |
IF( t2.l7 IS NULL, 0, t2.l7) as l7, | |
IF( t2.l8 IS NULL, 0, t2.l8) as l8, | |
IF( t2.l9 IS NULL, 0, t2.l9) as l9, | |
IF( t2.l10 IS NULL, 0, t2.l10) as l10, | |
IF( t2.l11 IS NULL, 0, t2.l11) as l11, | |
IF( t2.l12 IS NULL, 0, t2.l12) as l12, | |
IF( t2.l13 IS NULL, 0, t2.l13) as l13 | |
from | |
train_ordered t2 | |
), quantative as ( | |
select rowid, 16777217 + 1 as feature, l1 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 2 as feature, l2 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 3 as feature, l3 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 4 as feature, l4 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 5 as feature, l5 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 6 as feature, l6 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 7 as feature, l7 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 8 as feature, l8 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 9 as feature, l9 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 10 as feature, l10 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 11 as feature, l11 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 12 as feature, l12 as value from train_nonzero | |
union all | |
select rowid, 16777217 + 13 as feature, l13 as value from train_nonzero | |
), | |
quantative_stats as ( | |
select | |
feature, | |
avg(value) as mean, stddev_pop(value) as stddev, | |
min(value) as min, max(value) as max | |
from | |
quantative | |
group by | |
feature | |
),quantative_norm as ( | |
select | |
t1.rowid, | |
collect_list( | |
feature( | |
t1.feature, rescale(t1.value, t2.min, t2.max) | |
) | |
) as features | |
from | |
quantative t1 | |
JOIN quantative_stats t2 ON (t1.feature = t2.feature) | |
group by | |
t1.rowid | |
)INSERT OVERWRITE TABLE train_quantative | |
SELECT | |
rowid, features | |
FROM | |
quantative_norm t1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment