Skip to content

Instantly share code, notes, and snippets.

@KalelR
Last active April 20, 2022 13:40
Show Gist options
  • Save KalelR/fadc70d4523c58c58eb3eaf12562d475 to your computer and use it in GitHub Desktop.
Save KalelR/fadc70d4523c58c58eb3eaf12562d475 to your computer and use it in GitHub Desktop.
Test for optimal radius in dbscan
using ChaosTools, DynamicalSystemsBase, ChaosTools.DelayEmbeddings,ChaosTools.Entropies
using OrdinaryDiffEq
using Test, Random
using Neighborhood
using Distances, Clustering, Distribution
using Statistics, PyPlot
function basins_fractions_retfeatures(mapper, ics; isaverage=true, isnormalized=false)
feature_array = ChaosTools.extract_features(mapper, ics; show_progress=true)
if isnormalized feature_array = mapslices(nmlz, feature_array, dims=2) end
class_labels, _, ϵ, s_grid, ϵ_grid = ChaosTools.classify_features_clustering(feature_array, mapper.min_neighbors, mapper.clust_method_norm, isaverage)
fs = basins_fractions(class_labels)
return fs, class_labels, feature_array, ϵ, s_grid, ϵ_grid
end
function ChaosTools.optimal_radius_dbscan(features, min_neighbors, metric; isaverage=true)
feat_ranges = maximum(features, dims=2)[:,1] .- minimum(features, dims=2)[:,1];
ϵ_grid = range(minimum(feat_ranges)/200, minimum(feat_ranges), length=200)
s_grid = zeros(size(ϵ_grid)) #average silhouette values (which we want to maximize)
#vary ϵ to find the best one (which will maximize the minimum sillhoute)
for i=1:length(ϵ_grid)
clusters = ChaosTools.dbscan(features, ϵ_grid[i]; min_neighbors)
dists = ChaosTools.pairwise(metric, features)
class_labels = ChaosTools.cluster_props(clusters, features)
if length(clusters) ≠ 1 #silhouette undefined if only one cluster. ***
sils = ChaosTools.silhouettes(class_labels, dists)
if(isaverage)
s_grid[i] = mean(sils)
else
s_grid[i] = minimum(sils)
end
else
s_grid[i] = 0; #considers single-cluster solution on the midpoint (following Wikipedia)
end
end
max, idx = findmax(s_grid)
ϵ_optimal = ϵ_grid[idx]
return ϵ_optimal, s_grid, ϵ_grid
end
function ChaosTools.classify_features_clustering(features, min_neighbors, metric, isaverage=true)
ϵ_optimal, s_grid, ϵ_grid = ChaosTools.optimal_radius_dbscan(features, min_neighbors, metric, isaverage=isaverage)
# Now recalculate the final clustering with the optimal ϵ
clusters = ChaosTools.dbscan(features, ϵ_optimal; min_neighbors)
clusters, sizes = ChaosTools.sort_clusters_calc_size(clusters)
class_labels = ChaosTools.cluster_props(clusters, features; include_boundary=false)
# number of real clusters (size above minimum points);
# this is also the number of "templates"
k = length(sizes[sizes .> min_neighbors])
# create templates/labels, assign errors
class_errors = zeros(size(features)[2])
for i=1:k
idxs_cluster = class_labels .== i
center = mean(features[:, class_labels .== i], dims=2)[:,1]
dists = colwise(Euclidean(), center, features[:, idxs_cluster])
class_errors[idxs_cluster] = dists
end
return class_labels, class_errors, ϵ_optimal, s_grid, ϵ_grid
end
function nmlz(vec::Vector{T}) where T
vec .-= minimum(vec)
max = maximum(vec)
if max == 0 return zeros(T, length(vec)) end
vec ./= maximum(vec)
end
function tester(numics, mapper, grid; isnormalized=true, isaverage=true, system="lorenz84", seed=1234, xlims=nothing)
sampler, = statespace_sampler(Random.MersenneTwister(seed); min_bounds = minimum.(grid), max_bounds = maximum.(grid))
ics = Dataset([sampler() for i in 1:numics]);
fs, cls, features, ϵ, s_grid, ϵ_grid = basins_fractions_retfeatures(mapper, ics; isaverage, isnormalized)
fig=figure(); PyPlot.scatter(features[1,:], features[2,:], c=cls); colorbar()
if xlim !=(isnothing(xlim)) xlim(xlims) end
stitle = "$(system)-normalized_$(isnormalized)-numics_$(numics)-average_$(isaverage)\n fs = $(fs)\n optimal_radius = $(ϵ), seed = $(seed)"; title(stitle);
sfile = "$(system)-normalized_$(isnormalized)-numics_$(numics)-average_$(isaverage)-seed_$(seed)"; savefig("figs/$(sfile).png")
return fs, cls, features, ϵ, s_grid, ϵ_grid
end
function featurizer(A, t)
g = exp(genentropy(A, 0.1; q = 0))
p = estimate_period(A[:,1], :zc)
p = isnan(p) ? 0 : p
return [g, p]
end
system = "lorenz84"
F = 6.886; G = 1.347; a = 0.255; b = 4.0
ds = Systems.lorenz84(; F, G, a, b)
u0s = [
1 => [2.0, 1, 0], # periodic
2 => [-2.0, 1, 0], # chaotic
3 => [0, 1.5, 1.0], # fixed point
]
M = 200; z = 3
xg = yg = zg = range(-z, z; length = M)
grid = (xg, yg, zg)
expected_fs_raw = Dict(2 => 0.165, 3 => 0.642, 1 => 0.193)
#original
diffeq = (alg = Vern9(), reltol = 1e-9, abstol = 1e-9)
mapper = ChaosTools.AttractorsViaFeaturizing(ds, featurizer; diffeq, Ttr = 500)
tester(1000, mapper, grid; isnormalized=false, isaverage=true, system="lorenz84", seed=1234, xlims=nothing)
#datseris in gist
diffeq = (alg = Tsit5(), dt = 0.005, adaptive = false)
mapper = ChaosTools.AttractorsViaFeaturizing(ds, featurizer; diffeq, Ttr = 100.0, Δt=1.0, T=100)
tester(100, mapper, grid; isnormalized=false, isaverage=true, system="lorenz84-diffintegration", seed=1234, xlims=nothing)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment