Skip to content

Instantly share code, notes, and snippets.

@puyokw
puyokw / sales_store1.py
Last active November 21, 2017 07:20
td_intern rossmann
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates
import datetime
import pandas_td as td
import os
@puyokw
puyokw / randomForest.r
Last active June 3, 2016 18:03
introduction to xgboost
# おまけ:比較に用いたランダムフォレストのコード
odd.n<-2*(1:75)-1
iris.train<-iris[odd.n,] # 奇数を訓練データ
iris.test<-iris[-odd.n,] # 偶数を検証データ
# randomForest
library(randomForest)
set.seed(131)
train.x<-iris.train[,1:4]
train.y<-as.factor(iris.train[,5])
model.rf<-tuneRF(train.x,train.y,doBest=T)
@puyokw
puyokw / cv1_pred.sql
Last active May 5, 2016 02:01
td_intern rossmann
WITH test_cv1 as(
select
*
from
train_cv where gid=1
) INSERT OVERWRITE TABLE cv1
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
@puyokw
puyokw / cv1_model.sql
Last active March 13, 2016 11:40
td_intern criteo
WITH cv1_train AS(
SELECT *
FROM
train_cv where rnd > 0.3
)INSERT OVERWRITE TABLE cv1_model
select
feature,
avg(Wi) as Wi,
array_avg(Vif) as Vif
from (
@puyokw
puyokw / fm_model.sql
Last active March 13, 2016 11:37
td_intern criteo
INSERT OVERWRITE TABLE fm_model
select
feature,
avg(Wi) as Wi,
array_avg(Vif) as Vif
from (
select
train_fm(features, label, "-c -factor 0 -iters 50 -eta 0.01 -int_feature")
as (feature, Wi, Vif)
from
@puyokw
puyokw / minmax.sql
Last active March 11, 2016 12:24
td_intern criteo
--presto
DROP TABLE IF EXISTS minmax;
CREATE TABLE minmax as
WITH t1 as (
select
min(l1) as min_l1,max(l1) as max_l1,min(l2) as min_l2,max(l2) as max_l2,min(l3) as min_l3,max(l3) as max_l3,min(l4) as min_l4,max(l4) as max_l4,min(l5) as min_l5,max(l5) as max_l5,min(l6) as min_l6,max(l6) as max_l6,min(l7) as min_l7,max(l7) as max_l7,min(l8) as min_l8,max(l8) as max_l8,min(l9) as min_l9,max(l9) as max_l9,min(l10) as min_l10,max(l10) as max_l10,min(l11) as min_l11,max(l11) as max_l11,min(l12) as min_l12,max(l12) as max_l12,min(l13) as min_l13,max(l13) as max_l13
from
train_ordered
union all
select
@puyokw
puyokw / test_quantative.sql
Last active March 11, 2016 12:07
td_intern criteo
-- @TD autoconvertjoin: true
INSERT OVERWRITE TABLE test_quantative
SELECT
rowid,
array_remove(array(
if(l1 is null, null, feature(16777217 + 1, rescale(l1, min_l1, max_l1))),
if(l2 is null, null, feature(16777217 + 2, rescale(l2, min_l2, max_l2))),
if(l3 is null, null, feature(16777217 + 3, rescale(l3, min_l3, max_l3))),
if(l4 is null, null, feature(16777217 + 4, rescale(l4, min_l4, max_l4))),
if(l5 is null, null, feature(16777217 + 5, rescale(l5, min_l5, max_l5))),
@puyokw
puyokw / train_quntative.sql
Last active March 11, 2016 12:05
td_intern criteo
-- @TD autoconvertjoin: true
INSERT OVERWRITE TABLE train_quantative
SELECT
rowid,
array_remove(array(
if(l1 is null, null, feature(16777217 + 1, rescale(l1, min_l1, max_l1))),
if(l2 is null, null, feature(16777217 + 2, rescale(l2, min_l2, max_l2))),
if(l3 is null, null, feature(16777217 + 3, rescale(l3, min_l3, max_l3))),
if(l4 is null, null, feature(16777217 + 4, rescale(l4, min_l4, max_l4))),
if(l5 is null, null, feature(16777217 + 5, rescale(l5, min_l5, max_l5))),
@puyokw
puyokw / pred_cv.sql
Last active March 11, 2016 08:04
td_intern otto
-- トレーニング
INSERT OVERWRITE TABLE model_cv
SELECT train_randomforest_classifier(features, label, '-trees 500')
FROM train_cv;
-- 予測
INSERT OVERWRITE TABLE pred_cv
SELECT
t2.rowid as rowid,
t2.predicted.label as label,
@puyokw
puyokw / cv1_pred.sql
Last active March 11, 2016 07:51
td_intern criteo
WITH cv1_test AS(
SELECT *
FROM
train_cv
WHERE rnd <=0.3
), cv1_test_exploded AS(
select
label,
rowid,
extract_feature(fv) as feature,