create table model
as
select
feature,
voted_avg(weight) as weight
from
(select
-- hinge loss by the default
train_classifier(add_bias(features),label,'-iter 20') as (feature,weight)
-- logistic loss
-- train_classifier(add_bias(features),label,'-iter 20 -loss logistic') as (feature,weight)
from
train
) t
group by feature;
create or replace view predict
as
WITH test_exploded as (
select
rowid,
label,
extract_feature(feature) as feature,
extract_weight(feature) as value
from
test
LATERAL VIEW explode(add_bias(features)) t AS feature
)
select
t.rowid,
sum(m.weight * t.value) as score,
if(sum(m.weight * t.value) > 0, 1, -1) as label
-- for logistic loss
-- sigmoid(sum(m.weight * t.value)) as prob,
-- if(sigmoid(sum(m.weight * t.value)) >= 0.5, 1, -1) as label
from
test_exploded t
LEFT OUTER JOIN model m ON (t.feature = m.feature)
group by
t.rowid
;
create or replace view submit
as
select
t.label as actual,
p.label as predicted,
p.score
from
test t
JOIN predict p on (t.rowid = p.rowid);
select count(1) / 4996 from submit
where actual == predicted;
0.8895116092874299