Skip to content

Instantly share code, notes, and snippets.

@puyokw
puyokw / minmax.sql
Last active March 11, 2016 12:24
td_intern criteo
--presto
DROP TABLE IF EXISTS minmax;
CREATE TABLE minmax as
WITH t1 as (
select
min(l1) as min_l1,max(l1) as max_l1,min(l2) as min_l2,max(l2) as max_l2,min(l3) as min_l3,max(l3) as max_l3,min(l4) as min_l4,max(l4) as max_l4,min(l5) as min_l5,max(l5) as max_l5,min(l6) as min_l6,max(l6) as max_l6,min(l7) as min_l7,max(l7) as max_l7,min(l8) as min_l8,max(l8) as max_l8,min(l9) as min_l9,max(l9) as max_l9,min(l10) as min_l10,max(l10) as max_l10,min(l11) as min_l11,max(l11) as max_l11,min(l12) as min_l12,max(l12) as max_l12,min(l13) as min_l13,max(l13) as max_l13
from
train_ordered
union all
select
@puyokw
puyokw / fm_test_exploded.sql
Created March 11, 2016 06:08
td_intern criteo
INSERT OVERWRITE TABLE fm_test_exploded
select
rowid,
extract_feature(fv) as feature,
extract_weight(fv) as Xi
from
fm_test t1 LATERAL VIEW explode(features) t2 as fv;
@puyokw
puyokw / var_imp.sql
Created March 10, 2016 09:22
td_intern otto
INSERT OVERWRITE TABLE var_importance
select
array_sum(var_importance) as var_importance,
sum(oob_errors) / sum(oob_tests) as oob_err_rate
from
model;
@puyokw
puyokw / fm_test_int.sql
Created March 10, 2016 07:08
td_intern criteo
INSERT OVERWRITE TABLE fm_test
select
t2.rowid,
concat_array(
array_remove(array(
if(t2.c1 is null, null, cast( mhash(concat("c1:", t2.c1)) as string) ),
if(t2.c2 is null, null, cast( mhash(concat("c2:", t2.c2)) as string) ),
if(t2.c3 is null, null, cast( mhash(concat("c3:", t2.c3)) as string) ),
if(t2.c4 is null, null, cast( mhash(concat("c4:", t2.c4)) as string) ),
if(t2.c5 is null, null, cast( mhash(concat("c5:", t2.c5)) as string) ),
@puyokw
puyokw / mlogloss.sql
Created March 9, 2016 12:09
td_intern otto
-- 正しいラベルを挿入
INSERT OVERWRITE TABLE cv1
SELECT t1.*, t2.label as actual
FROM pred_cv t1
LEFT OUTER JOIN test_cv t2 ON (t1.rowid=t2.rowid);
-- mlogloss を計算
SELECT
SUM(LN(IF(actual = 0,
probabilities[0],
@puyokw
puyokw / pred_cv.sql
Last active March 11, 2016 08:04
td_intern otto
-- トレーニング
INSERT OVERWRITE TABLE model_cv
SELECT train_randomforest_classifier(features, label, '-trees 500')
FROM train_cv;
-- 予測
INSERT OVERWRITE TABLE pred_cv
SELECT
t2.rowid as rowid,
t2.predicted.label as label,
@puyokw
puyokw / cv1_dataset.sql
Created March 9, 2016 12:06
td_intern otto
-- トレーニングデータに乱数を入れる
INSERT OVERWRITE TABLE training_with_rnd
select *, rand(1) as rnd
from training;
-- rnd が0.3 以上のものをトレーニングデータ
INSERT OVERWRITE TABLE train_cv
select *
from training_with_rnd where rnd >= 0.3;
@puyokw
puyokw / var_imp_otto.ipynb
Last active March 10, 2016 03:48
td_intern otto
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@puyokw
puyokw / otto_submission.ipynb
Created March 9, 2016 11:18
td_intern otto
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@puyokw
puyokw / submission.sql
Created March 9, 2016 11:15
td_intern otto
INSERT OVERWRITE TABLE submission
SELECT
t2.id as id,
IF(t1.probabilities[0] is null, 0, t1.probabilities[0]) as Class_1,
IF(t1.probabilities[1] is null, 0, t1.probabilities[1]) as Class_2,
IF(t1.probabilities[2] is null, 0, t1.probabilities[2]) as Class_3,
IF(t1.probabilities[3] is null, 0, t1.probabilities[3]) as Class_4,
IF(t1.probabilities[4] is null, 0, t1.probabilities[4]) as Class_5,
IF(t1.probabilities[5] is null, 0, t1.probabilities[5]) as Class_6,
IF(t1.probabilities[6] is null, 0, t1.probabilities[6]) as Class_7,