-
-
Save KalelR/fadc70d4523c58c58eb3eaf12562d475 to your computer and use it in GitHub Desktop.
Test for optimal radius in dbscan
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using ChaosTools, DynamicalSystemsBase, ChaosTools.DelayEmbeddings,ChaosTools.Entropies | |
using OrdinaryDiffEq | |
using Test, Random | |
using Neighborhood | |
using Distances, Clustering, Distribution | |
using Statistics, PyPlot | |
function basins_fractions_retfeatures(mapper, ics; isaverage=true, isnormalized=false) | |
feature_array = ChaosTools.extract_features(mapper, ics; show_progress=true) | |
if isnormalized feature_array = mapslices(nmlz, feature_array, dims=2) end | |
class_labels, _, ϵ, s_grid, ϵ_grid = ChaosTools.classify_features_clustering(feature_array, mapper.min_neighbors, mapper.clust_method_norm, isaverage) | |
fs = basins_fractions(class_labels) | |
return fs, class_labels, feature_array, ϵ, s_grid, ϵ_grid | |
end | |
function ChaosTools.optimal_radius_dbscan(features, min_neighbors, metric; isaverage=true) | |
feat_ranges = maximum(features, dims=2)[:,1] .- minimum(features, dims=2)[:,1]; | |
ϵ_grid = range(minimum(feat_ranges)/200, minimum(feat_ranges), length=200) | |
s_grid = zeros(size(ϵ_grid)) #average silhouette values (which we want to maximize) | |
#vary ϵ to find the best one (which will maximize the minimum sillhoute) | |
for i=1:length(ϵ_grid) | |
clusters = ChaosTools.dbscan(features, ϵ_grid[i]; min_neighbors) | |
dists = ChaosTools.pairwise(metric, features) | |
class_labels = ChaosTools.cluster_props(clusters, features) | |
if length(clusters) ≠ 1 #silhouette undefined if only one cluster. *** | |
sils = ChaosTools.silhouettes(class_labels, dists) | |
if(isaverage) | |
s_grid[i] = mean(sils) | |
else | |
s_grid[i] = minimum(sils) | |
end | |
else | |
s_grid[i] = 0; #considers single-cluster solution on the midpoint (following Wikipedia) | |
end | |
end | |
max, idx = findmax(s_grid) | |
ϵ_optimal = ϵ_grid[idx] | |
return ϵ_optimal, s_grid, ϵ_grid | |
end | |
function ChaosTools.classify_features_clustering(features, min_neighbors, metric, isaverage=true) | |
ϵ_optimal, s_grid, ϵ_grid = ChaosTools.optimal_radius_dbscan(features, min_neighbors, metric, isaverage=isaverage) | |
# Now recalculate the final clustering with the optimal ϵ | |
clusters = ChaosTools.dbscan(features, ϵ_optimal; min_neighbors) | |
clusters, sizes = ChaosTools.sort_clusters_calc_size(clusters) | |
class_labels = ChaosTools.cluster_props(clusters, features; include_boundary=false) | |
# number of real clusters (size above minimum points); | |
# this is also the number of "templates" | |
k = length(sizes[sizes .> min_neighbors]) | |
# create templates/labels, assign errors | |
class_errors = zeros(size(features)[2]) | |
for i=1:k | |
idxs_cluster = class_labels .== i | |
center = mean(features[:, class_labels .== i], dims=2)[:,1] | |
dists = colwise(Euclidean(), center, features[:, idxs_cluster]) | |
class_errors[idxs_cluster] = dists | |
end | |
return class_labels, class_errors, ϵ_optimal, s_grid, ϵ_grid | |
end | |
function nmlz(vec::Vector{T}) where T | |
vec .-= minimum(vec) | |
max = maximum(vec) | |
if max == 0 return zeros(T, length(vec)) end | |
vec ./= maximum(vec) | |
end | |
function tester(numics, mapper, grid; isnormalized=true, isaverage=true, system="lorenz84", seed=1234, xlims=nothing) | |
sampler, = statespace_sampler(Random.MersenneTwister(seed); min_bounds = minimum.(grid), max_bounds = maximum.(grid)) | |
ics = Dataset([sampler() for i in 1:numics]); | |
fs, cls, features, ϵ, s_grid, ϵ_grid = basins_fractions_retfeatures(mapper, ics; isaverage, isnormalized) | |
fig=figure(); PyPlot.scatter(features[1,:], features[2,:], c=cls); colorbar() | |
if xlim !=(isnothing(xlim)) xlim(xlims) end | |
stitle = "$(system)-normalized_$(isnormalized)-numics_$(numics)-average_$(isaverage)\n fs = $(fs)\n optimal_radius = $(ϵ), seed = $(seed)"; title(stitle); | |
sfile = "$(system)-normalized_$(isnormalized)-numics_$(numics)-average_$(isaverage)-seed_$(seed)"; savefig("figs/$(sfile).png") | |
return fs, cls, features, ϵ, s_grid, ϵ_grid | |
end | |
function featurizer(A, t) | |
g = exp(genentropy(A, 0.1; q = 0)) | |
p = estimate_period(A[:,1], :zc) | |
p = isnan(p) ? 0 : p | |
return [g, p] | |
end | |
system = "lorenz84" | |
F = 6.886; G = 1.347; a = 0.255; b = 4.0 | |
ds = Systems.lorenz84(; F, G, a, b) | |
u0s = [ | |
1 => [2.0, 1, 0], # periodic | |
2 => [-2.0, 1, 0], # chaotic | |
3 => [0, 1.5, 1.0], # fixed point | |
] | |
M = 200; z = 3 | |
xg = yg = zg = range(-z, z; length = M) | |
grid = (xg, yg, zg) | |
expected_fs_raw = Dict(2 => 0.165, 3 => 0.642, 1 => 0.193) | |
#original | |
diffeq = (alg = Vern9(), reltol = 1e-9, abstol = 1e-9) | |
mapper = ChaosTools.AttractorsViaFeaturizing(ds, featurizer; diffeq, Ttr = 500) | |
tester(1000, mapper, grid; isnormalized=false, isaverage=true, system="lorenz84", seed=1234, xlims=nothing) | |
#datseris in gist | |
diffeq = (alg = Tsit5(), dt = 0.005, adaptive = false) | |
mapper = ChaosTools.AttractorsViaFeaturizing(ds, featurizer; diffeq, Ttr = 100.0, Δt=1.0, T=100) | |
tester(100, mapper, grid; isnormalized=false, isaverage=true, system="lorenz84-diffintegration", seed=1234, xlims=nothing) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment