Skip to content

Instantly share code, notes, and snippets.

@thistleknot
Last active January 2, 2022 17:08
Show Gist options
  • Save thistleknot/b9f04cddc420cb253bc59cff02766a3f to your computer and use it in GitHub Desktop.
Save thistleknot/b9f04cddc420cb253bc59cff02766a3f to your computer and use it in GitHub Desktop.
findKnee.R
findknee <- function(xdata)
{
rate_of_change=(xdata[1]-xdata[length(xdata)])/(length(xdata)-1)
xdata$delta = xdata-xdata[length(xdata)]
xdata$deltas[1] = xdata$delta[1]
for (d in 2:length(xdata))
{
xdata$deltas[d]=xdata$deltas[d-1]-rate_of_change
}
for (d in 1:length(xdata))
{
xdata$deltas[d]=xdata$delta[d]-xdata$deltas[d]
}
return(abs(xdata$deltas))
}
#library("factoextra")
#finds the convex or invex of any given curve by subtracting the min/max's linear plane and then finding the absolute max.
df <- read.csv(file="C:\\Users\\User\\Documents\\wiki\\wiki\\dev\\python\\python-ml\\data\\raw\\states.csv",text=readings, header = TRUE, sep = ",", dec = ".")
data <- df[,2:ncol(df)]
data_scaled <- scale(data)
#f_data <- fviz_nbclust(data_scaled, kmeans, method = "wss", k.max = 24) + theme_minimal() + ggtitle("the Elbow Method")
set.seed(31)
metric1 <- c()
metric2 <- c()
for (k in 1:15)
{#k=2
#print(k)
km <- kmeans(data_scaled, nstart=100, centers=k)
metric1 <- c(metric1,km$betweenss)
metric2 <- c(metric2,sum(km$withinss)/length(km$withinss))
}
plot(metric1)
plot(metric2)
knee1 = (findknee(metric1))
knee1 = knee1[complete.cases(knee1)]
knee1 = knee1/max(knee1)
knee2 = (findknee(metric2))
knee2 = knee2[complete.cases(knee2)]
knee2 = knee2/max(knee2)
print(which.max(knee1))
print(which.max(knee2))
plot(knee1)
lines(knee1)
lines(knee2)
optimal_k <- which.min(abs(knee1/knee2-1))
@thistleknot
Copy link
Author

thistleknot commented Nov 21, 2021

image

image

Idea behind it
image

@thistleknot
Copy link
Author

thistleknot commented Nov 24, 2021

image

@thistleknot
Copy link
Author

thistleknot commented Nov 24, 2021

which(knee1/knee2==1)
[1] 4
print(which.max(knee1))
[1] 4
print(which.max(knee2))
[1] 4

@thistleknot
Copy link
Author

def findknee(xdata):
    rate_of_change=(xdata[0]-xdata[-1])/(len(xdata)-1)
    #print(rate_of_change)
    delta = xdata-xdata[-1]
    deltas = []
    deltas.append(delta[0])
    for d in range(1,len(xdata)):
        deltas.append(deltas[d-1]-rate_of_change)
    #print(deltas)
    for d in range(0,len(xdata)):
        deltas[d]=delta[d]-deltas[d]
    return(np.round(np.abs(deltas)))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment