Skip to content

Instantly share code, notes, and snippets.

@puyokw
puyokw / DataScienceCup.r
Last active August 29, 2015 14:18
データサイエンス・カップ 2015 春
# 欠損値補完
# トレーニングデータの読み込み(output.csv の読み込み)
data1<-read.csv("output.csv")
# ID, 湿度, 観客数, 節を除いた
# IDは予測すべきもののIDが大きくかけ離れているため
# 湿度と観客数は欠損している
# 節は月とほぼ同じ内容
temp<-data1[,c(-1,-2,-5,-16)]
temp.x<-data1[,c(-1,-2,-5,-15,-16)]
temp.y<-data1[,15]
@puyokw
puyokw / test_join.sql
Created February 24, 2016 10:28
td_intern rossmann
INSERT OVERWRITE TABLE testing2
SELECT
rowid() as rowid, t1.id, t1.stateholiday, t1.store, t1.promo, t1.dayofweek,
t1.date, t1.schoolholiday, t2.promo2sinceweek, t2.competitionopensinceyear,
t2.assortment, t2.promo2sinceyear, t2.competitiondistance, t2.promointerval, t2.promo2,
t2.storetype, t2.competitionopensincemonth,
SUBSTR(t1.date,1,4) as year,
SUBSTR(t1.date,6,2) as month,
SUBSTR(t1.date,9,2) as day
FROM
@puyokw
puyokw / prediction.sql
Created February 24, 2016 10:30
td_intern rossmann
INSERT OVERWRITE TABLE prediction
SELECT
rowid,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
@puyokw
puyokw / submission.sql
Created February 24, 2016 10:31
td_intern rossmann
SELECT
t2.id as id,
t1.predicted as sales
FROM
prediction t1
JOIN testing2 t2 ON(t1.rowid = t2.rowid)
ORDER BY id;
@puyokw
puyokw / var_imp.py
Created February 25, 2016 03:59
td_intern rossmann
%matplotlib inline
import os
import pandas as pd
import pandas_td as td
import matplotlib.pyplot as plt
con = td.connect(apikey=os.environ['TD_API_KEY'],endpoint='https://api.treasuredata.com/')
engine = con.query_engine(database='rossmann', type='presto')
var_imp=td.read_td_table('var_importance', engine)
@puyokw
puyokw / train_join.sql
Last active February 25, 2016 07:42
td_intern rossmann
WITH train_opened as (
SELECT
rowid() as rowid,
t.stateholiday, t.store, t.promo, t.dayofweek,
t.date, t.schoolholiday, t.sales,
SUBSTR(t.date,1,4) as year,
SUBSTR(t.date,6,2) as month,
SUBSTR(t.date,9,2) as day
FROM
train_original t
@puyokw
puyokw / make_model.sql
Last active February 26, 2016 06:34
td_intern rossmann
INSERT
OVERWRITE TABLE
model SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
training3
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
@puyokw
puyokw / train_quantfy.sql
Last active March 8, 2016 04:39
td_intern rossmann
WITH train_ordered as (
select * from training2
order by rowid asc
),
train_quantified as (
select
t0.rowid,
t2.*
from
train_ordered t0
@puyokw
puyokw / test_quantify.sql
Last active March 8, 2016 04:39
td_intern rossmann
WITH train_test as (
select
1 as train_first, false as output_row, rowid, stateholiday, store, promo, dayofweek, date, schoolholiday, promo2sinceweek,
competitionopensinceyear, assortment, promo2sinceyear, competitiondistance, promointerval, promo2,
storetype, competitionopensincemonth, year, month, day
from
training2
union all
select
2 as train_first, true as output_row, rowid, stateholiday, store, promo, dayofweek, date, schoolholiday, promo2sinceweek,
@puyokw
puyokw / model.sql
Last active March 8, 2016 05:05
td_intern rossmann
INSERT
OVERWRITE TABLE
model SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
training3
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM