Last active
September 17, 2019 00:57
-
-
Save donpdonp/44eaa969736b22e9a6f0fc7e84fe63bf to your computer and use it in GitHub Desktop.
gluon kmeans cluster test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
return { | |
name: "kluster" | |
} | |
}) | |
var proxpairs = {} | |
function go(msg) { | |
if(msg.method == "irc.privmsg") { | |
var cmd = /^\!kluster(\s+(\d+)\s+(\d+))?$/ | |
var match = cmd.exec(msg.params.message) | |
if(match){ | |
var kcount = parseInt(match[2]) || 10 | |
var itercount = parseInt(match[3]) || 10 | |
bot.say(msg.params.channel, "kluster clusters "+kcount+" iters "+itercount) | |
var opts = {vectorFunction: kmeansVector, numberOfClusters: kcount, maxIterations: itercount} | |
proximity_update(opts, function(allclusters){ | |
var avgdistall = avgdistClusters(allclusters) | |
var clusters = allclusters.reduce(function(m,c){if(c.data.length>0) m.push(c); return m}, []) | |
var avgdist = avgdistClusters(clusters) | |
bot.say(msg.params.channel, allclusters.length+" clusters. "+clusters.length+ | |
" populated. cluster mean avg dist = "+(avgdist.avg/1024).toFixed(1)+"km"+ | |
" stddev "+(avgdist.stddev/1024).toFixed(1)+"km"+" allcluster mean avg dist = "+ | |
(avgdistall.avg/1024).toFixed(1)+"km"+" stddev "+(avgdistall.stddev/1024).toFixed(1)+"km") | |
var reports = clusters.map(function(c){ | |
return {members: c.data.map(function(f){return "_"+f.username}), | |
mean: c.mean.map(function(mc){return mc.toFixed(4)})} | |
}) | |
reports.forEach(function(r, idx){ | |
bot.say(msg.params.channel, "group "+(idx+1)+"/"+reports.length+" "+r.mean[0]+","+r.mean[1]+" "+ | |
r.members.join(',')) | |
}) | |
}) | |
} | |
var cmd = /^\!vincenty(\s+([-+]?[0-9]*\.?[0-9]+)\s+([-+]?[0-9]*\.?[0-9]+)\s+([-+]?[0-9]*\.?[0-9]+)\s+([-+]?[0-9]*\.?[0-9]+))?$/ | |
var match = cmd.exec(msg.params.message) | |
if(match){ | |
var lt1 = match[2] | |
var lg1 = match[3] | |
var lt2 = match[4] | |
var lg2 = match[5] | |
bot.say(msg.params.channel, "dist "+lt1+","+lg1+" "+lt2+","+lg2) | |
var v = distVincenty(lt1, lg1, lt2, lg2) | |
var h = pointDistance({latitude:lt1, longitude:lg1}, {latitude:lt2, longitude:lg2}) | |
bot.say(msg.params.channel, "vincenty "+v+" haversine "+h+" diff "+Math.abs(v-h)) | |
} | |
} | |
if(msg.method == "icecondor.location") { | |
var location = msg.params | |
// user_track_load(location.username, function(track){ | |
// if(track) { | |
// bot.say(bot.admin_channel, "icecondor.location: "+location.username) | |
// var opts = {vectorFunction: kmeansVector} | |
// proximity_update(opts, function(clusters){ | |
// var report = clusters.map(function(c){ | |
// return {members: c.data.map(function(f){return "_"+f.username}), mean: c.mean}}) | |
// bot.say(bot.admin_channel, "IC proxk: "+JSON.stringify(report)) | |
// }) | |
// } | |
// }) | |
} | |
} | |
function avgdistClusters(clusters) { | |
if(clusters.length > 1) { | |
distances = [] | |
var prevLL = clusters[0].mean | |
for(var i=1; i < clusters.length; i += 1) { | |
var ll = clusters[i].mean | |
distances.push(pointDistance({latitude:prevLL[0], longitude:prevLL[1]}, {latitude:ll[0], longitude:ll[1]})) | |
prevLL = ll | |
} | |
var avg = avgList(distances) | |
var sqdiffs = distances.map(function(d){return Math.pow(d-avg,2)}) | |
var stddev = Math.sqrt(avgList(sqdiffs)) | |
return {avg: avg, stddev: stddev } | |
} else { return {avg: 0, stddev: 0} } | |
} | |
function avgList(list) { | |
return list.reduce(function(m,d){return m+d}, 0)/list.length | |
} | |
function proximity_update(opts, cb) { | |
userlist_load(function(list){ | |
friend_tracks(list, function(friends){ | |
//pairprox(friends, location) | |
cb(getClusters(friends, opts)) | |
}) | |
}) | |
} | |
function kmeansVector(friend) { | |
return [friend.location.latitude, friend.location.longitude] | |
} | |
function pairprox(friends, location) { | |
var age_mins = (new Date() - new Date(location.date))/60/1000 | |
friends.forEach(function(friend){ | |
if(age_mins < 30 && friend.age_mins < 30) { | |
var key = [location.username, friend.username].sort().join(':') | |
if(proxpairs[key]) { | |
var prox2 = Math.log(friend.distance) / Math.log(2) | |
var lastprox = proxpairs[key] | |
var lastprox2 = Math.log(lastprox) / Math.log(2) | |
var delta2 = lastprox2 - prox2 | |
var proxdiff = Math.abs(lastprox - friend.distance) | |
var report = "_"+location.username +" moved "+ | |
(delta2 > 0 ? "further from" : "closer to") + | |
" _"+ friend.username + | |
" by "+proxdiff.toFixed(1)+"m. distance was "+(lastprox/1000).toFixed(3)+"km"+ | |
" now is "+(friend.distance/1000).toFixed(3)+"km (2^"+delta2.toFixed(8)+")" | |
//bot.say(bot.admin_channel, report) | |
if(Math.abs(delta2) > 1) { | |
//bot.say(alert_channel, report) | |
} | |
} else { | |
var pmsg = "first: _"+location.username+" "+age_mins.toFixed(1)+"min ago"+ | |
"-> _"+friend.username+" "+friend.age_mins.toFixed(1)+"min ago = "+ | |
friend.distance.toFixed(0)+"m apart" | |
bot.say(bot.admin_channel, pmsg) | |
} | |
proxpairs[key] = friend.distance | |
} | |
}) | |
} | |
function user_track_load(username, cb, missing_cb) { | |
db.get('icecondor:user'+':'+username+':track', function(trackJson){ | |
var track | |
if (trackJson) { | |
track = JSON.parse(trackJson) | |
} else { | |
} | |
cb(track) | |
}) | |
} | |
function userlist_load(cb) { | |
var key_userlist = 'icecondor:userlist' | |
db.get(key_userlist, function(listJson){ | |
if(listJson) { | |
list = JSON.parse(listJson) | |
} | |
cb(list) | |
}) | |
} | |
function friend_tracks(userlist, cb) { | |
promise_all(userlist.map(function(username){ | |
return function(done){ | |
return user_track_load(username, function(friend_track){ | |
var friend_age_mins = (new Date() - new Date(friend_track.location.date))/60/1000 | |
done({username: username, age_mins: friend_age_mins, location: friend_track.location}) | |
})} | |
}), function(successes, results) { | |
//bot.say(bot.admin_channel, "friend_distances successes "+JSON.stringify(successes)+" results "+JSON.stringify(results)) | |
var winners = results.reduce(function(m,e){if(e) m.push(e); return m}, []) | |
cb(winners) | |
}) | |
} | |
function promise_all(cbs, done_cb) { | |
var cbs_done = Array(cbs.length) | |
var cbs_results = Array(cbs.length) | |
cbs.forEach(function(cb, idx) { | |
if(typeof(cb) == "function") { | |
cbs_done[idx] = false | |
var done = function(result) { | |
cbs_done[idx] = true | |
cbs_results[idx] = result | |
if(cbs_done.every(function(x){return x})) { done_cb(cbs_done, cbs_results) } | |
} | |
cb(done) | |
} | |
}) | |
if(cbs_done.every(function(x){return x === null})) { | |
// no callbacks were passed | |
done_cb(cbs_done) | |
} | |
} | |
// K MEANS | |
// https://github.com/shudima/dimas-kmeans | |
function getClusters (data, options) { | |
var numberOfClusters = options.numberOfClusters || getNumberOfClusters(data.length) | |
var distanceFunction = options.distanceFunction || getDistance | |
var vectorFunction = options.vectorFunction || defaultVectorFunction | |
var maxIterations = options.maxIterations || 1000 | |
var numberOfDimensions = getNumberOfDimensions(data, vectorFunction) | |
var minMaxValues = getMinAndMaxValues(data, numberOfDimensions, vectorFunction) | |
bot.say(bot.admin_channel, "k minMax "+JSON.stringify(minMaxValues)) | |
// var means = createRandomMeans(numberOfDimensions, numberOfClusters, minMaxValues) | |
var means = createOrderedMeans(numberOfDimensions, numberOfClusters, minMaxValues) | |
var clusters = createClusters(means) | |
for(var count = 0; count < maxIterations; count++) { | |
initClustersData(clusters) | |
assignDataToClusters(data, clusters, distanceFunction, vectorFunction) | |
updateMeans(clusters, vectorFunction) | |
//var meansDistance = getMeansDistance(clusters, vectorFunction, distanceFunction) | |
} | |
return clusters | |
} | |
function defaultVectorFunction (vector) { | |
return vector | |
} | |
function getNumberOfDimensions (data, vectorFunction) { | |
if (data[0]) { | |
return vectorFunction(data[0]).length | |
} | |
return 0 | |
} | |
function getNumberOfClusters (n) { | |
return Math.ceil(Math.sqrt(n / 2)) | |
} | |
function getMinAndMaxValues (data, numberOfDimensions, vectorFunction) { | |
var minMaxValues = initMinAndMaxValues(numberOfDimensions) | |
data.forEach(function (vector) { | |
for (var i = 0; i < numberOfDimensions; i++) { | |
if (vectorFunction(vector)[i] < minMaxValues.minValue[i]) { | |
minMaxValues.minValue[i] = vectorFunction(vector)[i] | |
} | |
if (vectorFunction(vector)[i] > minMaxValues.maxValue[i]) { | |
minMaxValues.maxValue[i] = vectorFunction(vector)[i] | |
} | |
}; | |
}) | |
return minMaxValues | |
} | |
function initMinAndMaxValues (numberOfDimensions) { | |
var result = { minValue: [], maxValue: [] } | |
for (var i = 0; i < numberOfDimensions; i++) { | |
result.minValue.push(9999) | |
result.maxValue.push(-9999) | |
} | |
return result | |
} | |
function getMeansDistance (clusters, vectorFunction, distanceFunction) { | |
var meansDistance = 0 | |
clusters.forEach(function (cluster) { | |
cluster.data.forEach(function (vector) { | |
meansDistance = meansDistance + Math.pow(distanceFunction(cluster.mean, vectorFunction(vector)), 2) | |
}) | |
}) | |
return meansDistance | |
} | |
function updateMeans (clusters, vectorFunction) { | |
clusters.forEach(function (cluster) { | |
updateMean(cluster, vectorFunction) | |
}) | |
} | |
function updateMean (cluster, vectorFunction) { | |
var newMean = [] | |
for (var i = 0; i < cluster.mean.length; i++) { | |
newMean.push(getMean(cluster.data, i, vectorFunction)) | |
} | |
cluster.mean = newMean | |
} | |
function getMean (data, index, vectorFunction) { | |
var sum = 0 | |
var total = data.length | |
if (total == 0) return 0 | |
data.forEach(function (vector) { | |
sum = sum + vectorFunction(vector)[index] | |
}) | |
return sum / total | |
} | |
function assignDataToClusters (data, clusters, distanceFunction, vectorFunction) { | |
data.forEach(function (vector) { | |
var cluster = findClosestCluster(vectorFunction(vector), clusters, distanceFunction) | |
cluster.data.push(vector) | |
}) | |
} | |
function findClosestCluster (vector, clusters, distanceFunction) { | |
var closest = {} | |
var minDistance = 9999999 | |
clusters.forEach(function (cluster) { | |
var distance = distanceFunction(cluster.mean, vector) | |
if (distance < minDistance) { | |
minDistance = distance | |
closest = cluster | |
}; | |
}) | |
bot.say(bot.admin_channel, closest.username+" closest to "+ JSON.stringify(closest.mean) +" with "+(minDistance/1024)+"km") | |
return closest | |
} | |
function initClustersData (clusters) { | |
clusters.forEach(function (cluster) { | |
cluster.data = [] | |
}) | |
} | |
function createClusters (means) { | |
return means.map(function (mean) { | |
return { mean: mean, data: [] } | |
}) | |
} | |
function createRandomMeans (numberOfDimensions, numberOfClusters, minMaxValues) { | |
var means = [] | |
for (var i = 0; i < numberOfClusters; i++) { | |
means.push(createRandomPoint(minMaxValues)) | |
} | |
return means | |
} | |
function createOrderedMeans (numberOfDimensions, numberOfClusters, minMaxValues) { | |
var means = [] | |
// only works for 2dim data | |
var colSize = 2 | |
var rowSize = numberOfClusters / colSize | |
for (var col = 0; col < colSize; col += 1) { | |
for (var row = 0; row < rowSize; row += 1) { | |
means.push(createOrderedXYPoint(col, row, colSize, rowSize, minMaxValues)) | |
} | |
} | |
return means | |
} | |
function createRandomPoint(minMaxValues) { | |
var point = [] | |
for (var i = 0, len = minMaxValues.minValue.length; i < len; i++) { | |
point.push(random(minMaxValues.minValue[i], minMaxValues.maxValue[i])) | |
} | |
return point | |
} | |
function createOrderedXYPoint(col, row, colSize, rowSize, minMaxValues) { | |
var offset = 0.5 | |
var colDistance = minMaxValues.maxValue[0] - minMaxValues.minValue[0] | |
var colUnit = colDistance / colSize | |
var rowDistance = minMaxValues.maxValue[1] - minMaxValues.minValue[1] | |
var rowUnit = rowDistance / rowSize | |
var point = [colUnit * (col+offset), rowUnit * (row+offset)] | |
//bot.say(bot.admin_channel, "orderedXY "+JSON.stringify(point)+" colSize="+colSize+" rowSize="+rowSize+" colUnit="+colUnit+" rowUnit="+rowUnit) | |
return point | |
} | |
function random (low, high) { | |
return Math.random() * (high - low) + low | |
} | |
function getDistance (vector1, vector2) { | |
var sum = 0 | |
for (var i = 0; i < vector1.length; i++) { | |
sum = sum + Math.pow(vector1[i] - vector2[i], 2) | |
} | |
return Math.sqrt(sum) | |
} | |
// from http://www.movable-type.co.uk/scripts/latlong.html | |
// haversine formula computes great-circle distance | |
function pointDistance(la, lb) { | |
var lat1 = la.latitude, lon1 = la.longitude | |
var lat2 = lb.latitude, lon2 = lb.longitude | |
var dLat = numberToRadius(lat2 - lat1), | |
dLon = numberToRadius(lon2 - lon1), | |
a = Math.pow(Math.sin(dLat / 2), 2) + Math.cos(numberToRadius(lat1)) | |
* Math.cos(numberToRadius(lat2)) * Math.pow(Math.sin(dLon / 2), 2), | |
c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); | |
return (6371 * c) * 1000; // returns meters | |
} | |
function numberToRadius(number) { | |
return number * Math.PI / 180; | |
} | |
function numberToDegree(number) { | |
return number * 180 / Math.PI | |
} | |
function segmentBearing(loc1, loc2) { | |
var λ1 = numberToRadius(loc1.longitude) | |
var λ2 = numberToRadius(loc2.longitude) | |
var φ1 = numberToRadius(loc1.latitude) | |
var φ2 = numberToRadius(loc2.latitude) | |
var y = Math.sin(λ2-λ1) * Math.cos(φ2); | |
var x = Math.cos(φ1)*Math.sin(φ2) - | |
Math.sin(φ1)*Math.cos(φ2)*Math.cos(λ2-λ1); | |
var brng = Math.atan2(y, x)// .toDegrees() | |
var bearingDegrees = numberToDegree(brng) | |
var offset = (bearingDegrees < 0) ? 360 : 0 | |
return offset + bearingDegrees | |
} | |
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
/* Vincenty Direct Solution of Geodesics on the Ellipsoid (c) Chris Veness 2005-2012 */ | |
/* http://creativecommons.org/licenses/by/3.0/ */ | |
/* from: Vincenty direct formula - T Vincenty, "Direct and Inverse Solutions of Geodesics on the */ | |
/* Ellipsoid with application of nested equations", Survey Review, vol XXII no 176, 1975 */ | |
/* http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf */ | |
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
/** | |
* Calculates destination point given start point lat/long, bearing & distance, | |
* using Vincenty inverse formula for ellipsoids | |
* | |
* @param {Number} lat1, lon1: first point in decimal degrees | |
* @param {Number} brng: initial bearing in decimal degrees | |
* @param {Number} dist: distance along bearing in metres | |
* @returns (LatLon} destination point | |
*/ | |
function destVincenty(lat1, lon1, brng, dist) { | |
var a = 6378137, | |
b = 6356752.3142, | |
f = 1 / 298.257223563; // WGS-84 ellipsiod | |
var s = dist; | |
var alpha1 = numberToRadius(brng); | |
var sinAlpha1 = Math.sin(alpha1); | |
var cosAlpha1 = Math.cos(alpha1); | |
var tanU1 = (1 - f) * Math.tan(numberToRadius(lat1)); | |
var cosU1 = 1 / Math.sqrt((1 + tanU1 * tanU1)), | |
sinU1 = tanU1 * cosU1; | |
var sigma1 = Math.atan2(tanU1, cosAlpha1); | |
var sinAlpha = cosU1 * sinAlpha1; | |
var cosSqAlpha = 1 - sinAlpha * sinAlpha; | |
var uSq = cosSqAlpha * (a * a - b * b) / (b * b); | |
var A = 1 + uSq / 16384 * (4096 + uSq * (-768 + uSq * (320 - 175 * uSq))); | |
var B = uSq / 1024 * (256 + uSq * (-128 + uSq * (74 - 47 * uSq))); | |
var sigma = s / (b * A), | |
sigmaP = 2 * Math.PI; | |
while (Math.abs(sigma - sigmaP) > 1e-12) { | |
var cos2SigmaM = Math.cos(2 * sigma1 + sigma); | |
var sinSigma = Math.sin(sigma); | |
var cosSigma = Math.cos(sigma); | |
var deltaSigma = B * sinSigma * (cos2SigmaM + B / 4 * | |
(cosSigma * (-1 + 2 * cos2SigmaM * cos2SigmaM) - B / 6 * | |
cos2SigmaM * (-3 + 4 * sinSigma * sinSigma) * | |
(-3 + 4 * cos2SigmaM * cos2SigmaM))); | |
sigmaP = sigma; | |
sigma = s / (b * A) + deltaSigma; | |
} | |
var tmp = sinU1 * sinSigma - cosU1 * cosSigma * cosAlpha1; | |
var lat2 = Math.atan2(sinU1 * cosSigma + cosU1 * sinSigma * cosAlpha1, (1 - f) * | |
Math.sqrt(sinAlpha * sinAlpha + tmp * tmp)); | |
var lambda = Math.atan2(sinSigma * sinAlpha1, cosU1 * cosSigma - sinU1 * sinSigma * cosAlpha1); | |
var C = f / 16 * cosSqAlpha * (4 + f * (4 - 3 * cosSqAlpha)); | |
var L = lambda - (1 - C) * f * sinAlpha * (sigma + C * sinSigma * (cos2SigmaM + C * cosSigma * | |
(-1 + 2 * cos2SigmaM * cos2SigmaM))); | |
var lon2 = (numberToRadius(lon1) + L + 3 * Math.PI) % (2 * Math.PI) - Math.PI; // normalise to -180...+180 | |
var revAz = Math.atan2(sinAlpha, -tmp); // final bearing, if required | |
return { | |
lat: numberToDegree(lat2), | |
lon: numberToDegree(lon2), | |
finalBearing: numberToDegree(revAz) | |
}; | |
} | |
/*! | |
* JavaScript function to calculate the geodetic distance between two points specified by latitude/longitude using the Vincenty inverse formula for ellipsoids. | |
* | |
* Taken from http://movable-type.co.uk/scripts/latlong-vincenty.html and optimized / cleaned up by Mathias Bynens <http://mathiasbynens.be/> | |
* Based on the Vincenty direct formula by T. Vincenty, “Direct and Inverse Solutions of Geodesics on the Ellipsoid with application of nested equations”, Survey Review, vol XXII no 176, 1975 <http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf> | |
* | |
* @param {Number} lat1, lon1: first point in decimal degrees | |
* @param {Number} lat2, lon2: second point in decimal degrees | |
* @returns {Number} distance in metres between points | |
*/ | |
function distVincenty(lat1, lon1, lat2, lon2) { | |
var a = 6378137, | |
b = 6356752.3142, | |
f = 1 / 298.257223563, // WGS-84 ellipsoid params | |
L = numberToRadius(lon2-lon1), | |
x = Math.atan(1 - f), | |
U1 = x * Math.tan(numberToRadius(lat1)), | |
U2 = x * Math.tan(numberToRadius(lat2)), | |
sinU1 = Math.sin(U1), | |
cosU1 = Math.cos(U1), | |
sinU2 = Math.sin(U2), | |
cosU2 = Math.cos(U2), | |
lambda = L, | |
lambdaP, | |
iterLimit = 100; | |
do { | |
var sinLambda = Math.sin(lambda), | |
cosLambda = Math.cos(lambda), | |
sinSigma = Math.sqrt((cosU2 * sinLambda) * (cosU2 * sinLambda) + (cosU1 * sinU2 - sinU1 * cosU2 * cosLambda) * | |
(cosU1 * sinU2 - sinU1 * cosU2 * cosLambda)) | |
if (0 === sinSigma) { | |
return 0; // co-incident points | |
} | |
var cosSigma = sinU1 * sinU2 + cosU1 * cosU2 * cosLambda, | |
sigma = Math.atan2(sinSigma, cosSigma), | |
sinAlpha = cosU1 * cosU2 * sinLambda / sinSigma, | |
cosSqAlpha = 1 - sinAlpha * sinAlpha, | |
cos2SigmaM = cosSigma - 2 * sinU1 * sinU2 / cosSqAlpha, | |
C = f / 16 * cosSqAlpha * (4 + f * (4 - 3 * cosSqAlpha)); | |
if (isNaN(cos2SigmaM)) { | |
cos2SigmaM = 0; // equatorial line: cosSqAlpha = 0 (§6) | |
}; | |
lambdaP = lambda; | |
lambda = L + (1 - C) * f * sinAlpha * (sigma + C * sinSigma * | |
(cos2SigmaM + C * cosSigma * (-1 + 2 * cos2SigmaM * cos2SigmaM))); | |
} while (Math.abs(lambda - lambdaP) > 1e-12 && --iterLimit > 0); | |
if (0 === iterLimit) { | |
return NaN; // formula failed to converge | |
} | |
var uSq = cosSqAlpha * (a * a - b * b) / (b * b), | |
A = 1 + uSq / 16384 * (4096 + uSq * (-768 + uSq * (320 - 175 * uSq))), | |
B = uSq / 1024 * (256 + uSq * (-128 + uSq * (74 - 47 * uSq))), | |
deltaSigma = B * sinSigma * (cos2SigmaM + B / 4 * (cosSigma * (-1 + 2 * cos2SigmaM * cos2SigmaM) - B / 6 * | |
cos2SigmaM * (-3 + 4 * sinSigma * sinSigma) * (-3 + 4 * | |
cos2SigmaM * cos2SigmaM))), | |
s = b * A * (sigma - deltaSigma); | |
return s.toFixed(3); // round to 1mm precision | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment