cosmos/lib/cosmos/core_ext/array.rb
# encoding: ascii-8bit
# Copyright 2022 Ball Aerospace & Technologies Corp.
# All Rights Reserved.
#
# This program is free software; you can modify and/or redistribute it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation; version 3 with
# attribution addendums as found in the LICENSE.txt
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# This program may also be used under the terms of a commercial or
# enterprise edition license of COSMOS if purchased from the
# copyright holder
require 'cosmos/ext/array' if RUBY_ENGINE == 'ruby' and !ENV['COSMOS_NO_EXT']
# COSMOS specific additions to the Ruby Array class
class Array
# Redefine inspect to only print for small numbers of
# items. Prevents exceptions taking forever to be raise with
# large objects. See http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/105145
alias old_inspect inspect
# @param max_elements [Integer] The maximum number of elements in the array to
# print out before simply displaying the array class and object id
# @return [String] String representation of the array
def inspect(max_elements = 10)
if self.length <= max_elements
old_inspect()
else
'#<' + self.class.to_s + ':' + self.object_id.to_s + '>'
end
end
# @return [Array] Cloned array after all elements called to_f
def clone_to_f
new_array = self.class.new(0)
self.each do |value|
new_array << value.to_f
end
new_array
end
# Returns the array index nearest to the passed in value. This only makes sense
# for numerical arrays containing integers or floats. It has an optimized
# algorithm if the array is sorted but will fail if passed unsorted data with
# the sorted option.
#
# @param value [Numeric] A value to search for in the array
# @param ordered_data [Boolean] Whether or not the data is sorted
# @return [Integer] The index of the element closest to value
def nearest_index(value, ordered_data = true)
raise "Cannot search on empty array" if self.empty?
if ordered_data
last_index = self.length - 1
first_value = self[0].to_f
last_value = self[-1].to_f
return 0 if first_value == last_value
slope = last_index.to_f / (last_value - first_value)
offset = -(slope * first_value)
guess_index = ((slope * value.to_f) + offset).to_i
# Return immediately for boundary conditions
return 0 if guess_index < 0
return last_index if guess_index > last_index
# Verify guess index
previous_guess_index = nil
previous_guess_value = nil
# While in the valid range of indexes
while guess_index >= 0 and guess_index <= last_index
# Retrieve the value at our current guess index
guess_value = self[guess_index]
# We're done if we found the exact value
return guess_index if guess_value == value
if previous_guess_value # Determine if we did better or worse
# Was previous guess better or worse?
if (guess_value - value).abs <= (previous_guess_value - value).abs
# Previous Guess Worse or the same
if guess_value > value # Moving with decreasing indexes
if previous_guess_value > value # Still moving in right direction
previous_guess_index = guess_index
guess_index -= 1
else # We passed the value
return guess_index
end
else # guess_value < value and moving with increasing indexes
if previous_guess_value < value # Still moving in right direction
previous_guess_index = guess_index
guess_index += 1
else # We passed the value
return guess_index
end
end
else
# Previous Guess Better
return previous_guess_index
end
else # Move to the next point
previous_guess_index = guess_index
if guess_value > value
guess_index -= 1
else # guess_value < value
guess_index += 1
end
end
previous_guess_value = guess_value
end
# Return our best guess
return 0 if guess_index < 0
return last_index
else # Brute force search
# Calculate the initial delta
min_delta = (self[0] - value).abs
closest_index = 0
self.each_with_index do |self_value, index|
# Calculate the delta between the current value and value we are
# searching for
delta = (value - self_value).abs
# If the newly calculate delta is less than or equal to are previous
# minimum delta then we proceed
if delta <= min_delta
# There is a special case if the delta is equal to the previously
# calculated delta. We want to round up in this case so we check if
# the value we are searching for is greater than the current value.
# If so we skip this value since we don't want to round down.
next if (delta == min_delta) and (value > self_value)
min_delta = delta
closest_index = index
end
end
return closest_index
end
end
# Returns the index of the first element which is less than or equal to the
# passed in value.
#
# NOTE: This routine only works on sorted data!
#
# @param value [Numeric] The value to search for in the array
# @return [Fixnum] The index of the element which is less than or equal to
# the value
def index_lt_eq(value)
index = nearest_index(value)
# Keep backing up if self[index - 1] == value to move past duplicates
while index > 0 and self[index - 1] == value
index -= 1
end
return index if self[index] <= value
index -= 1 if index > 0
return index
end
# Returns the index of the last element which is greater than or equal to the
# passed in value.
#
# NOTE: This routine only works on sorted data!
#
# @param value [Numeric] The value to search for in the array
# @return [Fixnum] The index of the element which is greater than or equal to
# the value
def index_gt_eq(value)
index = nearest_index(value)
last_index = self.length - 1
# Keep moving forward if self[index - 1] == value to move past duplicates
while index < last_index and self[index + 1] == value
index += 1
end
return index if self[index] >= value
index += 1 if (self.length - 1) > index
return index
end
# Returns the range of array elements which contain both the start value and
# end value.
#
# NOTE: This routine only works on sorted data!
#
# @param start_value [Numeric] The start value to search for (must be less
# than end_value)
# @param end_value [Numeric] The end value to search for
# @return [Range] The range of array elements which contain both the
# start_value and end_value
def range_containing(start_value, end_value)
raise "end_value: #{end_value} must be greater than start_value: #{start_value}" if end_value < start_value
Range.new(index_lt_eq(start_value), index_gt_eq(end_value))
end
# Returns the range of array elements which within both the start value and
# end value.
#
# NOTE: This routine only works on sorted data!
#
# @param start_value [Numeric] The start value to search for (must be less
# than end_value)
# @param end_value [Numeric] The end value to search for
# @return [Range] The range of array elements which contain both the
# start_value and end_value
def range_within(start_value, end_value)
raise "end_value: #{end_value} must be greater than start_value: #{start_value}" if end_value < start_value
range = Range.new(index_gt_eq(start_value), index_lt_eq(end_value))
# Sometimes we get a backwards range so check for that and reverse it
range = Range.new(range.last, range.first) if range.last < range.first
range
end
if !(self.method_defined?(:sum))
# @return [Numeric] The sum of all the elements in the array
def sum
self.inject(0, :+)
end
end
# return [Float] The mean of all the elements in the array
def mean
return 0.0 if self.empty?
return self.sum / self.length.to_f
end
# return [Array] A new array with each value of the original squared
def squared
self.map { |value| value * value }
end
if RUBY_ENGINE != 'ruby' or ENV['COSMOS_NO_EXT']
# return [Numeric, Fixnum] The first maximum value and its index
def max_with_index
maximum = nil
maximum_index = nil
if self.length > 0
maximum = self[0]
maximum_index = 0
(1..(self.length - 1)).each do |index|
value = self[index]
if value > maximum
maximum = value
maximum_index = index
end
end
end
return [maximum, maximum_index]
end
# return [Numeric, Fixnum] The first minimum value and its index
def min_with_index
minimum = nil
minimum_index = nil
if self.length > 0
minimum = self[0]
minimum_index = 0
(1..(self.length - 1)).each do |index|
value = self[index]
if value < minimum
minimum = value
minimum_index = index
end
end
end
return [minimum, minimum_index]
end
end
# @param num_buckets [Integer] The number of buckets (groups of numbers) that
# will be used when histogramming. nil indicates to use as many buckets as
# necessary to cause each bucket to have a unique element.
# @param numeric [Boolean] Whether the array data is numeric
# @param block [Proc] If a block is given it will be called to sort buckets
# with the same object. This might be necessary if your data is not numeric
# and you want to override the way your objects compare.
# @return [Array<Array(first_value, last_value, num_values)>] Array of buckets
# which are arrays containing the first value that is found in the bucket,
# the last value found in the bucket, and the total number of values in the
# bucket.
def histogram(num_buckets = nil, numeric = false, &block)
buckets = {}
# Count the occurance of each value
self.each do |value|
buckets[value] ||= 0
buckets[value] += 1
end
# Sort buckets by value, use block for sorting if given
if block_given?
sorted_buckets = buckets.sort { |x, y| yield(x, y) }
else
sorted_buckets = buckets.sort
end
reduced_buckets = []
if num_buckets
# Validate num_buckets
raise "Invalid num_buckets #{num_buckets}" if num_buckets.to_i <= 0
# Handle histogram types
if numeric
# Numeric histograms use the same sized range for each bucket
first_value = sorted_buckets[0][0]
last_value = sorted_buckets[-1][0]
delta = last_value - first_value
bucket_size = delta.to_f / num_buckets.to_f
integers = false
integers = true if first_value.kind_of?(Integer) and last_value.kind_of?(Integer)
if integers
bucket_size = bucket_size.ceil
last_value = first_value + bucket_size * num_buckets - 1
delta = last_value - first_value
(delta + 1).times do |index|
buckets[first_value + index] ||= 0
end
if block_given?
sorted_buckets = buckets.sort { |val1, val2| yield(val1, val2) }
else
sorted_buckets = buckets.sort
end
end
bucket_ranges = []
current_value = first_value
num_buckets.times do |bucket_index|
if bucket_index == (num_buckets - 1)
bucket_ranges[bucket_index] = (current_value)..(last_value)
else
if integers
bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size - 1)
else
bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size)
end
end
current_value += bucket_size
end
# Build the final buckets
first_index = 0
sorted_index = 0
num_buckets.times do |bucket_index|
break if sorted_index > (sorted_buckets.length - 1)
sum = 0
bucket_range = bucket_ranges[bucket_index]
while bucket_range.include?(sorted_buckets[sorted_index][0])
sum += sorted_buckets[sorted_index][1]
sorted_index += 1
break if sorted_index > (sorted_buckets.length - 1)
end
reduced_buckets[bucket_index] = [bucket_range.first, bucket_range.last, sum]
end
else
# Non-numeric histograms use the same number of items per bucket
items_per_bucket = sorted_buckets.length / num_buckets.to_i
items_per_bucket = 1 if items_per_bucket < 1
bucket_sizes = [items_per_bucket] * num_buckets
excess_items = sorted_buckets.length - (items_per_bucket * num_buckets)
if excess_items > 0
bucket_sizes.length.times do |bucket_size_index|
break if excess_items <= 0
bucket_sizes[bucket_size_index] += 1
excess_items -= 1
end
end
# Build the final buckets
first_index = 0
num_buckets.times do |bucket_index|
break if first_index > (sorted_buckets.length - 1)
if bucket_index == (num_buckets - 1)
last_index = sorted_buckets.length - 1
else
last_index = first_index + bucket_sizes[bucket_index] - 1
last_index = sorted_buckets.length - 1 if last_index > (sorted_buckets.length - 1)
end
sum = 0
sorted_buckets[first_index..last_index].each { |key, value| sum += value }
reduced_buckets[bucket_index] = [sorted_buckets[first_index][0], sorted_buckets[last_index][0], sum]
first_index = first_index + bucket_sizes[bucket_index]
end
end
else
sorted_buckets.each { |key, value| reduced_buckets << [key, key, value] }
end
reduced_buckets
end
end # class Array