Skip to content

Instantly share code, notes, and snippets.

@myui
Created June 27, 2019 10:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save myui/f943fa3ce1a7e1ac3f2dd9a7f9fa703b to your computer and use it in GitHub Desktop.
Save myui/f943fa3ce1a7e1ac3f2dd9a7f9fa703b to your computer and use it in GitHub Desktop.
CREATE TABLE users (
rowid int, name string, age int, gender string
);
INSERT INTO users VALUES
(1, 'Jacob', 20, 'Male'),
(2, 'Mason', 22, 'Male'),
(3, 'Sophia', 35, 'Female'),
(4, 'Ethan', 55, 'Male'),
(5, 'Emma', 15, 'Female'),
(6, 'Noah', 46, 'Male'),
(7, 'Isabella', 20, 'Female')
;
CREATE TABLE input as
SELECT
rowid,
array_concat(
categorical_features(
array('name', 'gender'),
name, gender
),
quantitative_features(
array('age'),
age
)
) AS features
FROM
users;
WITH extracted as (
select
extract_feature(feature) as index,
extract_weight(feature) as value
from
input l
LATERAL VIEW explode(features) r as feature
),
mapping as (
select
index,
build_bins(value, 5, true) as quantiles -- 5 bins with auto bin shrinking
from
extracted
group by
index
),
bins as (
select
to_map(index, quantiles) as quantiles
from
mapping
)
select
l.features as original,
feature_binning(l.features, r.quantiles) as features
from
input l
cross join bins r
limit 10;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment