Skip to content

Instantly share code, notes, and snippets.

@kingcu
Created July 18, 2011 05:03
Show Gist options
  • Save kingcu/1088586 to your computer and use it in GitHub Desktop.
Save kingcu/1088586 to your computer and use it in GitHub Desktop.
simple kmeans clusterer for geo objects
class KMeans
attr_reader :clusters
def initialize(max=nil)
@max_iterations = max
end
def build(points, number_of_clusters)
@points = []
points.each do |pt|
next unless pt.lng && pt.lat
@points << ClusterPoint.new(pt.lng, pt.lat, pt)
end
@number_of_clusters = number_of_clusters
@iterations = 0
@converged = false
calculate_initial_clusters
while !(@converged || (@max_iterations && (@max_iterations <= @iterations)))
calculate_cluster_memberships
recompute_centers
@iterations += 1
end
return @clusters
end
protected
def calculate_initial_clusters
@clusters = []
tried_indexes = []
while @clusters.length < @number_of_clusters &&
tried_indexes.length < @points.length
random_index = rand(@points.length)
if !tried_indexes.include?(random_index)
tried_indexes << random_index
#TODO: inefficient
if @clusters.select { |c| c.center == @points[random_index]}.length == 0
@clusters << Cluster.new(@points[random_index], @clusters.length)
end
end
end
@number_of_clusters = @clusters.length
end
def calculate_cluster_memberships
@clusters.each { |c| c.points = [] }
@converged = true
@points.each do |point|
index = calculate_index(point)
@converged = false if index != point.cluster_label
point.cluster_label = index
@clusters[index].points << point
end
end
def recompute_centers
@clusters.each { |cluster| cluster.center = calculate_center(cluster) }
end
def calculate_center(cluster)
points = cluster.points
if points.length == 0
return cluster.center
end
lats, lngs = 0.0, 0.0
points.each do |pt|
lats += pt.lat
lngs += pt.lng
end
return ClusterPoint.new(lngs/points.length, lats/points.length)
end
# Classifies the given data item, returning the cluster index it belongs
def calculate_index(point)
get_min_index(@clusters.collect { |c| distance(point, c.center) })
end
def get_min_index(vals)
min = 999999999
min_index = 0
vals.each_with_index do |val, i|
if val < min
min = val
min_index = i
end
end
return min_index
end
def distance(a, b)
return euclidean_distance(a, b)
#return haversine_distance(a, b)
end
def euclidean_distance(a, b)
(a.lng - b.lng)**2 + (a.lat - b.lat)**2
end
# Calculate the distance in meters between two points of longitude and latitude using the Haversine method.
# This method assumes the earth is a perfect sphere in its calculations.
def haversine_distance(p1, p2)
# Convert to radians
#lat1 = (p1['y'] || p1['lat']) * Math::PI / 180
#lat2 = (p2['y'] || p2['lat']) * Math::PI / 180
#lon1 = (p1['x'] || p1['lng']) * Math::PI / 180
#lon2 = (p2['x'] || p2['lng']) * Math::PI / 180
lat1 = p1.lat * Math::PI / 180
lat2 = p2.lat * Math::PI / 180
lon1 = p1.lng * Math::PI / 180
lon2 = p2.lng * Math::PI / 180
lat_diff = (lat1 - lat2)
lon_diff = (lon1 - lon2)
radius = 6378100 # Mean radius of Earth in meters
# Calculate Haversine
a = Math.sin(lat_diff/2) * Math.sin(lat_diff/2) +
Math.cos(lat1) * Math.cos(lat2) * Math.sin(lon_diff/2) * Math.sin(lon_diff/2)
c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a))
return radius * c
end
end
class Cluster
attr_accessor :points, :center, :label
def initialize(center=nil, label=nil)
@center = center
@label = label
@points = []
end
end
class ClusterPoint
attr_accessor :lat, :lng, :cluster_label, :point
def initialize(x, y, point=nil)
@lng, @lat = x, y
@cluster_label = nil
@point = point
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment