Skip to content

Instantly share code, notes, and snippets.

@puyokw
puyokw / prediction.sql
Created March 9, 2016 11:14
td_intern otto
INSERT OVERWRITE TABLE prediction
SELECT
t2.rowid as rowid,
t2.predicted.label as label,
t2.predicted.probability as probability,
t2.predicted.probabilities as probabilities
FROM(
SELECT
rowid AS rowid,
rf_ensemble(predicted) AS predicted
@puyokw
puyokw / model.sql
Created March 9, 2016 10:51
td_intern otto
INSERT OVERWRITE TABLE model
SELECT
train_randomforest_classifier(features, label, '-trees 500')
FROM
training;
@puyokw
puyokw / test.sql
Created March 9, 2016 10:50
td_intern otto
INSERT OVERWRITE TABLE testing
SELECT
t1.id AS id,
rowid() as rowid,
ARRAY(t1.feat_1,t1.feat_2,t1.feat_3,t1.feat_4,t1.feat_5,t1.feat_6,t1.feat_7,t1.feat_8,t1.feat_9,t1.feat_10
,t1.feat_11,t1.feat_12,t1.feat_13,t1.feat_14,t1.feat_15,t1.feat_16,t1.feat_17,t1.feat_18,t1.feat_19,t1.feat_20
,t1.feat_21,t1.feat_22,t1.feat_23,t1.feat_24,t1.feat_25,t1.feat_26,t1.feat_27,t1.feat_28,t1.feat_29,t1.feat_30
,t1.feat_31,t1.feat_32,t1.feat_33,t1.feat_34,t1.feat_35,t1.feat_36,t1.feat_37,t1.feat_38,t1.feat_39,t1.feat_40
,t1.feat_41,t1.feat_42,t1.feat_43,t1.feat_44,t1.feat_45,t1.feat_46,t1.feat_47,t1.feat_48,t1.feat_49,t1.feat_50
,t1.feat_51,t1.feat_52,t1.feat_53,t1.feat_54,t1.feat_55,t1.feat_56,t1.feat_57,t1.feat_58,t1.feat_59,t1.feat_60
@puyokw
puyokw / train.sql
Created March 9, 2016 10:49
td_intern otto
INSERT OVERWRITE TABLE training
SELECT
t1.id AS id,
target,
cast(regexp_replace(target,'Class_','') as int)-1 as label, --0base
rowid() as rowid,
ARRAY(t1.feat_1,t1.feat_2,t1.feat_3,t1.feat_4,t1.feat_5,t1.feat_6,t1.feat_7,t1.feat_8,t1.feat_9,t1.feat_10
,t1.feat_11,t1.feat_12,t1.feat_13,t1.feat_14,t1.feat_15,t1.feat_16,t1.feat_17,t1.feat_18,t1.feat_19,t1.feat_20
,t1.feat_21,t1.feat_22,t1.feat_23,t1.feat_24,t1.feat_25,t1.feat_26,t1.feat_27,t1.feat_28,t1.feat_29,t1.feat_30
,t1.feat_31,t1.feat_32,t1.feat_33,t1.feat_34,t1.feat_35,t1.feat_36,t1.feat_37,t1.feat_38,t1.feat_39,t1.feat_40
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@puyokw
puyokw / logloss.sql
Created March 8, 2016 06:22
td_intern sql
SELECT
-sum(label*LN(predicted)+(1-label)*LN(1-predicted))/count(1) as logloss
FROM cv1;
@puyokw
puyokw / cv1_pred.sql
Last active March 11, 2016 07:51
td_intern criteo
WITH cv1_test AS(
SELECT *
FROM
train_cv
WHERE rnd <=0.3
), cv1_test_exploded AS(
select
label,
rowid,
extract_feature(fv) as feature,
@puyokw
puyokw / cv1_model.sql
Last active March 13, 2016 11:40
td_intern criteo
WITH cv1_train AS(
SELECT *
FROM
train_cv where rnd > 0.3
)INSERT OVERWRITE TABLE cv1_model
select
feature,
avg(Wi) as Wi,
array_avg(Vif) as Vif
from (
@puyokw
puyokw / train_cv.sql
Created March 8, 2016 06:16
td_intern criteo
INSERT OVERWRITE TABLE train_cv
select
rowid, features, label, rand(2016) as rnd
from
fm_train
cluster by rand(229);
@puyokw
puyokw / fm_pred.sql
Last active March 11, 2016 06:06
td_intern criteo
WITH fm_pred as(
select
t1.rowid,
sigmoid( fm_predict(p1.Wi, p1.Vif, t1.Xi) ) as predicted
from
fm_test_exploded t1
LEFT OUTER JOIN fm_model p1 ON (t1.feature = p1.feature)
group by
t1.rowid
) INSERT OVERWRITE TABLE submission