Skip to content

Instantly share code, notes, and snippets.

@franzbischoff
Last active April 25, 2022 20:36
Show Gist options
  • Save franzbischoff/0d8a5a01a0451ddf866dc2f6bfe1d30f to your computer and use it in GitHub Desktop.
Save franzbischoff/0d8a5a01a0451ddf866dc2f6bfe1d30f to your computer and use it in GitHub Desktop.
Experiment of FLUSS scores
# Create the GT and predictions
set.seed(2022)
ts_length <- 1000
n_points <- 10
truth <- sort(round(runif(n_points, 1, ts_length), 0))
min_diff <- min(abs(diff(truth)))
mask <- runif(n_points) > 0.5
reported_equal <- truth + round(runif(n_points, -min_diff / 2, min_diff / 2), 0)
reported_half <- reported_equal[mask]
reported_half_rand <- sort(round(runif(floor(n_points / 2), 1, ts_length), 0))
reported_dbl <- unique(sort(c(reported_equal, round(runif(n_points, 1, ts_length), 0))))
reported_dbl_rand <- sort(round(runif(floor(n_points * 2), 1, ts_length), 0))
reported_one <- reported_equal[floor(n_points / 2)]
reported_all <- seq.int(1, ts_length, 1)
score_regimes1 <- function(gtruth, reported, data_size) {
n <- length(gtruth)
m <- length(reported)
minv <- rep(Inf, n)
for (j in 1:n) {
for (i in 1:m) {
if (abs(reported[i] - gtruth[j]) < abs(minv[j])) {
minv[j] <- abs(reported[i] - gtruth[j])
}
}
}
o <- max(n, m)
h <- min(n, m)
score <- sum(minv) / data_size
score1 <- sum(minv) / m / data_size
score2 <- sum(minv) * m / data_size
score_max1 <- sum(minv) / o / data_size
score_max2 <- sum(minv) * o / data_size
score_min1 <- sum(minv) / h / data_size
score_min2 <- sum(minv) * h / data_size
return(round(c(
score = score,
score1 = score1,
score2 = score2,
score_max1 = score_max1,
score_max2 = score_max2,
score_min1 = score_min1,
score_min2 = score_min2
), 4))
}
score_regimes2 <- function(gtruth, reported, data_size) {
m <- length(gtruth)
n <- length(reported)
out <- max(m, n)
inn <- min(m, n)
if (out == m) {
outer <- sort(gtruth)
inner <- sort(reported)
} else {
outer <- sort(reported)
inner <- sort(gtruth)
}
minv <- rep(Inf, out)
for (j in seq.int(1, out)) {
for (i in seq.int(1, inn)) {
if (abs(inner[i] - outer[j]) < abs(minv[j])) {
minv[j] <- abs(inner[i] - outer[j])
}
}
}
score <- sum(minv) / data_size
score1 <- sum(minv) / n / data_size
score2 <- sum(minv) * n / data_size
score_max1 <- sum(minv) / out / data_size
score_max2 <- sum(minv) * out / data_size
score_min1 <- sum(minv) / inn / data_size
score_min2 <- sum(minv) * inn / data_size
return(round(c(
nscore = score,
nscore1 = score1,
nscore2 = score2,
nscore_max1 = score_max1,
nscore_max2 = score_max2,
nscore_min1 = score_min1,
nscore_min2 = score_min2
), 4))
}
score_regimes <- score_regimes1
tr <- score_regimes(truth, truth, ts_length)
eq <- score_regimes(truth, reported_equal, ts_length)
hf <- score_regimes(truth, reported_half, ts_length)
hfr <- score_regimes(truth, reported_half_rand, ts_length)
db <- score_regimes(truth, reported_dbl, ts_length)
dbr <- score_regimes(truth, reported_dbl_rand, ts_length)
on <- score_regimes(truth, reported_one, ts_length)
al <- score_regimes(truth, reported_all, ts_length)
scores <- data.frame(truth = tr, equal = eq, dbl = db, half = hf, one = on, dbl_r = dbr, half_r = hfr, all = al)
score_regimes <- score_regimes2
tr <- score_regimes(truth, truth, ts_length)
eq <- score_regimes(truth, reported_equal, ts_length)
hf <- score_regimes(truth, reported_half, ts_length)
hfr <- score_regimes(truth, reported_half_rand, ts_length)
db <- score_regimes(truth, reported_dbl, ts_length)
dbr <- score_regimes(truth, reported_dbl_rand, ts_length)
on <- score_regimes(truth, reported_one, ts_length)
al <- score_regimes(truth, reported_all, ts_length)
scores2 <- data.frame(truth = tr, equal = eq, dbl = db, half = hf, one = on, dbl_r = dbr, half_r = hfr, all = al)
scores <- rbind(scores, scores2)
print(scores)
# Possible best algorithm:
score_regimes3 <- function(gtruth, reported, data_size) {
m <- length(gtruth)
n <- length(reported)
out <- max(m, n)
inn <- min(m, n)
if (out == m) {
outer <- sort(gtruth)
inner <- sort(reported)
} else {
outer <- sort(reported)
inner <- sort(gtruth)
}
minv <- rep(Inf, out)
k <- 1
for (j in seq.int(1, out)) {
for (i in seq.int(k, inn)) {
if (abs(inner[i] - outer[j]) <= minv[j]) {
minv[j] <- abs(inner[i] - outer[j])
k <- i # pruning, truth and reported must be sorted
} else {
break # pruning, truth and reported must be sorted
}
}
}
# score <- sum(minv) / data_size
# score <- sum(minv) / n / data_size
# score <- sum(minv) * n / data_size
# score <- sum(minv) / out / data_size
# score <- sum(minv) * out / data_size # *
score <- sum(minv) / inn / data_size # *
# max = sum(abs(seq.int(1, out)-1000))/1000
return(score)
}
if (FALSE) {
library(plotly)
set.seed(2022)
ts_length <- 2000
n_points <- 100
truth <- sort(round(runif(n_points, 1, ts_length), 0))
score_mean <- NULL
score_sd <- NULL
x <- c(seq.int(1, floor(n_points * 2)), seq.int(floor(n_points * 2) + 1, ts_length, by = 10))
for (i in x) {
result <- NULL
cli::cli_inform("Size {i}.")
set.seed(2022)
for (j in seq.int(1, ceiling(n_points / (log2(i) + 1)))) {
# cli::cli_inform("Sample {j}.")
guess <- sort(round(runif(i, 1, ts_length), 0))
res <- score_regimes3(truth, guess, ts_length)
result <- c(result, res)
}
score_mean <- c(score_mean, mean(result))
score_sd <- c(score_sd, sd(result))
}
plot_ly(x = x, y = score_mean) # / n_points / ts_length
}
if (FALSE) {
library(plotly)
p <- subplot(
plot_ly() %>%
add_segments(
x = truth, y = 2,
xend = reported_equal, yend = 1,
color = I("#dddddd"), showlegend = FALSE
) %>%
add_markers(
x = truth, y = 2,
color = I("blue"),
name = "truth"
) %>%
add_markers(
x = reported_equal, y = 1,
color = I("red"), name = "reported_equal"
),
plot_ly() %>%
add_segments(
x = truth, y = 2,
xend = reported_equal, yend = 1,
color = I("#dddddd"), showlegend = FALSE
) %>%
add_markers(
x = truth, y = 2,
color = I("blue"), showlegend = FALSE
) %>%
add_markers(
x = reported_half, y = 1,
color = I("green"), name = "reported_half"
),
plot_ly() %>%
add_segments(
x = truth, y = 2,
xend = reported_equal, yend = 1,
color = I("#dddddd"), showlegend = FALSE
) %>%
add_markers(
x = truth, y = 2,
color = I("blue"), showlegend = FALSE
) %>%
add_markers(
x = reported_dbl, y = 1,
color = I("#dceb08"), name = "reported_dbl"
),
plot_ly() %>%
add_segments(
x = truth, y = 2,
xend = reported_equal, yend = 1,
color = I("#dddddd"), showlegend = FALSE
) %>%
add_markers(
x = truth, y = 2,
color = I("blue"), showlegend = FALSE
) %>%
add_markers(
x = reported_one, y = 1,
color = I("#e408eb"), name = "reported_one"
),
shareX = TRUE, nrows = 4
)
print(p)
}
##### Proposed algorithm
# 1. Check which is larger, the gtruth or the predicted regimes.
# 2. Sort both indexes for allowin pruning.
# 3. The larger one is the outer loop.
# 4. The smaller one is the inner loop.
# 5. For each outer loop index, find the closest inner loop index.
# 6. If the distance is smaller than the current minimum, update the minimum.
# 7. The score is the sum of the minimums divided by the data_size multiplied by the size of the inner loop.
score_regimes_final <- function(gtruth, predicted, data_size) {
m <- length(gtruth)
n <- length(predicted)
out <- max(m, n)
inn <- min(m, n)
if (out == m) {
outer <- sort(gtruth)
inner <- sort(predicted)
} else {
outer <- sort(predicted)
inner <- sort(gtruth)
}
minv <- rep(Inf, out)
k <- 1
for (j in seq.int(1, out)) {
for (i in seq.int(k, inn)) {
if (abs(inner[i] - outer[j]) <= minv[j]) {
minv[j] <- abs(inner[i] - outer[j])
k <- i # pruning, truth and predicted must be sorted
} else {
break # pruning, truth and predicted must be sorted
}
}
}
score <- sum(minv) / (inn * data_size)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment