Created
July 18, 2011 05:03
-
-
Save kingcu/1088586 to your computer and use it in GitHub Desktop.
simple kmeans clusterer for geo objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class KMeans | |
attr_reader :clusters | |
def initialize(max=nil) | |
@max_iterations = max | |
end | |
def build(points, number_of_clusters) | |
@points = [] | |
points.each do |pt| | |
next unless pt.lng && pt.lat | |
@points << ClusterPoint.new(pt.lng, pt.lat, pt) | |
end | |
@number_of_clusters = number_of_clusters | |
@iterations = 0 | |
@converged = false | |
calculate_initial_clusters | |
while !(@converged || (@max_iterations && (@max_iterations <= @iterations))) | |
calculate_cluster_memberships | |
recompute_centers | |
@iterations += 1 | |
end | |
return @clusters | |
end | |
protected | |
def calculate_initial_clusters | |
@clusters = [] | |
tried_indexes = [] | |
while @clusters.length < @number_of_clusters && | |
tried_indexes.length < @points.length | |
random_index = rand(@points.length) | |
if !tried_indexes.include?(random_index) | |
tried_indexes << random_index | |
#TODO: inefficient | |
if @clusters.select { |c| c.center == @points[random_index]}.length == 0 | |
@clusters << Cluster.new(@points[random_index], @clusters.length) | |
end | |
end | |
end | |
@number_of_clusters = @clusters.length | |
end | |
def calculate_cluster_memberships | |
@clusters.each { |c| c.points = [] } | |
@converged = true | |
@points.each do |point| | |
index = calculate_index(point) | |
@converged = false if index != point.cluster_label | |
point.cluster_label = index | |
@clusters[index].points << point | |
end | |
end | |
def recompute_centers | |
@clusters.each { |cluster| cluster.center = calculate_center(cluster) } | |
end | |
def calculate_center(cluster) | |
points = cluster.points | |
if points.length == 0 | |
return cluster.center | |
end | |
lats, lngs = 0.0, 0.0 | |
points.each do |pt| | |
lats += pt.lat | |
lngs += pt.lng | |
end | |
return ClusterPoint.new(lngs/points.length, lats/points.length) | |
end | |
# Classifies the given data item, returning the cluster index it belongs | |
def calculate_index(point) | |
get_min_index(@clusters.collect { |c| distance(point, c.center) }) | |
end | |
def get_min_index(vals) | |
min = 999999999 | |
min_index = 0 | |
vals.each_with_index do |val, i| | |
if val < min | |
min = val | |
min_index = i | |
end | |
end | |
return min_index | |
end | |
def distance(a, b) | |
return euclidean_distance(a, b) | |
#return haversine_distance(a, b) | |
end | |
def euclidean_distance(a, b) | |
(a.lng - b.lng)**2 + (a.lat - b.lat)**2 | |
end | |
# Calculate the distance in meters between two points of longitude and latitude using the Haversine method. | |
# This method assumes the earth is a perfect sphere in its calculations. | |
def haversine_distance(p1, p2) | |
# Convert to radians | |
#lat1 = (p1['y'] || p1['lat']) * Math::PI / 180 | |
#lat2 = (p2['y'] || p2['lat']) * Math::PI / 180 | |
#lon1 = (p1['x'] || p1['lng']) * Math::PI / 180 | |
#lon2 = (p2['x'] || p2['lng']) * Math::PI / 180 | |
lat1 = p1.lat * Math::PI / 180 | |
lat2 = p2.lat * Math::PI / 180 | |
lon1 = p1.lng * Math::PI / 180 | |
lon2 = p2.lng * Math::PI / 180 | |
lat_diff = (lat1 - lat2) | |
lon_diff = (lon1 - lon2) | |
radius = 6378100 # Mean radius of Earth in meters | |
# Calculate Haversine | |
a = Math.sin(lat_diff/2) * Math.sin(lat_diff/2) + | |
Math.cos(lat1) * Math.cos(lat2) * Math.sin(lon_diff/2) * Math.sin(lon_diff/2) | |
c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)) | |
return radius * c | |
end | |
end | |
class Cluster | |
attr_accessor :points, :center, :label | |
def initialize(center=nil, label=nil) | |
@center = center | |
@label = label | |
@points = [] | |
end | |
end | |
class ClusterPoint | |
attr_accessor :lat, :lng, :cluster_label, :point | |
def initialize(x, y, point=nil) | |
@lng, @lat = x, y | |
@cluster_label = nil | |
@point = point | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment