lib/hdo/one_dimensional_hierarchical_clusterer.rb
module Hdo
class OneDimensionalHierarchicalClusterer
attr_reader :clusters
def initialize(points, separation)
@points = points
@separation = separation
calculate_clustering
end
def nearest_cluster_for(point)
@clusters.reduce do |nearest, cluster|
distance_between(cluster, point) < distance_between(nearest, point) ? cluster : nearest
end
end
private
def calculate_clustering
@clusters = @points
return if @clusters.count <= 1
min_distance, closest_pair = find_key_of_min_value_in distances_matrix_for @clusters
while @clusters.count > 1 && min_distance < @separation && @separation != 0
min_distance, clusters = pair_nearest_in @clusters.dup
@clusters = clusters if min_distance < @separation
end
end
def pair_nearest_in(clusters)
distance, nearest_pair_indices = find_key_of_min_value_in distances_matrix_for clusters
new_cluster = clusters.select do |p|
p == clusters[nearest_pair_indices[0]] or p == clusters[nearest_pair_indices[1]]
end
clusters.delete_at clusters.index new_cluster[0]
clusters.delete_at clusters.index new_cluster[1]
[distance, clusters << new_cluster.flatten]
end
def find_key_of_min_value_in(hash)
min_key = nil
hash.each do |k,v|
min_key = k if hash[min_key] == nil || v < hash[min_key]
end
[hash[min_key], min_key]
end
def distances_matrix_for(clusters)
distances = Hash.new
clusters.each_with_index do |cluster, i|
clusters[(i+1)..-1].each_with_index do |other, j|
distances[[i,j+i+1]] = distance_between cluster,other
end
end
distances
end
def distance_between(cluster,other)
if Array === cluster
if Array === other
distance_between_array_and_array cluster, other
else
distance_between_array_and_point cluster, other
end
else
if Array === other
distance_between_array_and_point other, cluster
else
distance_between_point_and_point other, cluster
end
end
end
def distance_between_point_and_point(a,b)
Math.sqrt((a - b) ** 2)
end
def distance_between_array_and_point(array, point)
array.inject(Float::INFINITY) do |min_distance, array_point|
d = distance_between_point_and_point point, array_point
d < min_distance ? d : min_distance
end
end
def distance_between_array_and_array(array, other)
array.inject(Float::INFINITY) do |min_distance, array_point|
d = distance_between_array_and_point other, array_point
d < min_distance ? d : min_distance
end
end
end
end