Skip to content

Instantly share code, notes, and snippets.

@myui
Created August 24, 2018 09:47
Show Gist options
  • Save myui/a33a06ff3cf7db0e63ba46ec29703e43 to your computer and use it in GitHub Desktop.
Save myui/a33a06ff3cf7db0e63ba46ec29703e43 to your computer and use it in GitHub Desktop.
create table model 
as
select 
 feature,
 voted_avg(weight) as weight
from 
 (select 
     -- hinge loss by the default
     train_classifier(add_bias(features),label,'-iter 20') as (feature,weight)
     -- logistic loss
     -- train_classifier(add_bias(features),label,'-iter 20 -loss logistic') as (feature,weight)
  from 
     train
 ) t 
group by feature;


create or replace view predict
as
WITH test_exploded as (
  select 
    rowid,
    label,
    extract_feature(feature) as feature,
    extract_weight(feature) as value
  from
    test
    LATERAL VIEW explode(add_bias(features)) t AS feature
)
select
  t.rowid,
  sum(m.weight * t.value) as score,
  if(sum(m.weight * t.value) > 0, 1, -1) as label  
  -- for logistic loss 
  -- sigmoid(sum(m.weight * t.value)) as prob,
  -- if(sigmoid(sum(m.weight * t.value)) >= 0.5, 1, -1) as label
from 
  test_exploded t
  LEFT OUTER JOIN model m ON (t.feature = m.feature)
group by
  t.rowid
;

create or replace view submit 
as
select 
  t.label as actual, 
  p.label as predicted, 
  p.score
from 
  test t
  JOIN predict p on (t.rowid = p.rowid);

select count(1) / 4996 from submit 
where actual == predicted;

0.8895116092874299

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment