Skip to content

Instantly share code, notes, and snippets.

@orcaman
Created June 26, 2019 20:22
Show Gist options
  • Save orcaman/dba92235145675ba80af5adb71f7d14c to your computer and use it in GitHub Desktop.
Save orcaman/dba92235145675ba80af5adb71f7d14c to your computer and use it in GitHub Desktop.
all
WITH
Distances AS (
SELECT
DISTINCT ML.CENTROID_ID,
sepal_length,
sepal_width,
petal_length,
petal_width,
species,
MIN(NEAREST_CENTROIDS_DISTANCE.DISTANCE) AS distance_from_closest_centroid
FROM
ML.PREDICT(MODEL public.iris_clusters,
(
SELECT
DISTINCT sepal_length,
sepal_width,
petal_length,
petal_width,
species
FROM
`public.iris` )) AS ML
CROSS JOIN
UNNEST(NEAREST_CENTROIDS_DISTANCE) AS NEAREST_CENTROIDS_DISTANCE
GROUP BY
ML.CENTROID_ID,
sepal_length,
sepal_width,
petal_length,
petal_width,
species ),
Threshold AS (
SELECT
ROUND(APPROX_QUANTILES(distance_from_closest_centroid,10000)[
OFFSET
(9500)],2) AS threshold
FROM
Distances)
SELECT
d.*
FROM
Distances d
JOIN
Threshold
ON
d.distance_from_closest_centroid > Threshold.threshold
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment