library(tidyverse)
x <- seq(0, 10, length.out = 100)
unobserved <- tibble(
x = x,
true_curve1 = sin(x),
true_curve2 = tanh(x) - 0.5,
coin = as.logical(rbinom(length(x), size = 1, prob = 0.5)),
y1 = ifelse(coin, true_curve1, true_curve2),
y2 = ifelse(coin, true_curve2, true_curve1)
)
observed <- unobserved |>
pivot_longer(
c("y1", "y2"),
values_to = "y"
) |>
select(x, y)
observed |>
ggplot(aes(x, y)) +
geom_point() +
labs(
title = "How to identify which of the two curves each point belongs to?",
subtitle = "100 data points have been generated from two smooth curves, but we only see (x, y) pairs"
) +
theme_minimal()
unobserved |>
pivot_longer(
contains("true_curve"),
values_to = "y"
) |>
ggplot(aes(x, y, color = name)) +
geom_line() +
geom_point() +
scale_color_viridis_d(begin = 0.2, end = 0.8) +
labs(
color = "",
title = "Desired solution"
) +
theme_minimal()
##### k-means doesn't cut it ---------------------------------------------------
clus <- kmeans(observed, centers = 2)
observed |>
ggplot(aes(x, y, color = as.factor(clus$cluster))) +
geom_point() +
labs(
title = "Attempted solution using k-means"
) +
theme_void()
##### latent class regression?? ------------------------------------------------
library(flexmix)
#> Loading required package: lattice
set.seed(27)
model <- flexmix(
y ~ s(x),
data = observed,
model = FLXMRmgcv(),
k = 2
)
observed |>
ggplot(aes(x, y, color = as.factor(clusters(model)))) +
geom_point() +
labs(
title = "Attempted solution using a mixture of GAMs"
) +
theme_void()
Created on 2022-06-28 by the reprex package (v2.0.1)