lib/inventory_refresh/inventory_collection/helpers/initialize_helper.rb
require_relative "../helpers"
module InventoryRefresh
class InventoryCollection
module Helpers
module InitializeHelper
# @param association [Symbol] A Rails association callable on a :parent attribute is used for comparing with the
# objects in the DB, to decide if the InventoryObjects will be created/deleted/updated or used for obtaining
# the data from a DB, if a DB strategy is used. It returns objects of the :model_class class or its sub STI.
# @param model_class [Class] A class of an ApplicationRecord model, that we want to persist into the DB or load from
# the DB.
# @param name [Symbol] A unique name of the InventoryCollection under a Persister. If not provided, the :association
# attribute is used. If :association is nil as well, the :name will be inferred from the :model_class.
# @param parent [ApplicationRecord] An ApplicationRecord object that has a callable :association method returning
# the objects of a :model_class.
def init_basic_properties(association, model_class, name, parent)
@association = association
@model_class = model_class
@name = name || association || model_class.to_s.demodulize.tableize
@parent = parent || nil
end
# @param strategy [Symbol] A strategy of the InventoryCollection that will be used for saving/loading of the
# InventoryObject objects.
# Allowed strategies are:
# - nil => InventoryObject objects of the InventoryCollection will be saved to the DB, only these objects
# will be referable from the other InventoryCollection objects.
# - :local_db_cache_all => Loads InventoryObject objects from the database, it loads all the objects that
# are a result of a [<:parent>.<:association>, :arel] taking
# first defined in this order. This strategy will not save any objects in the DB.
# - :local_db_find_references => Loads InventoryObject objects from the database, it loads only objects that
# were referenced by the other InventoryCollections using a filtered result
# of a [<:parent>.<:association>, :arel] taking first
# defined in this order. This strategy will not save any objects in the DB.
# - :local_db_find_missing_references => InventoryObject objects of the InventoryCollection will be saved to
# the DB. Then if we reference an object that is not present, it will
# load them from the db using :local_db_find_references strategy.
# @param saver_strategy [Symbol] A strategy that will be used for InventoryCollection persisting into the DB.
# Allowed saver strategies are:
# - :default => Using Rails saving methods, this way is not safe to run in multiple workers concurrently,
# since it will lead to non consistent data.
# - :batch => Using batch SQL queries, this way is not safe to run in multiple workers
# concurrently, since it will lead to non consistent data.
# - :concurrent_safe_batch => It uses atomic upsert to avoid data duplication and it uses timestamp based
# atomic checks to avoid new data being overwritten by the the old data. The upsert/update queries are
# executed as batched SQL queries, instead of sending 1 query per record.
# @param retention_strategy [Symbol] A retention strategy for this collection. Allowed values are:
# - :destroy => Will destroy the inactive records.
# - :archive => Will archive the inactive records by setting :archived_at timestamp.
# @param delete_method [Symbol] A delete method that will be used for deleting of the InventoryObject, if the
# object is marked for deletion. A default is :destroy, the instance method must be defined on the
# :model_class.
def init_strategies(strategy, saver_strategy, retention_strategy, delete_method)
@saver_strategy = process_saver_strategy(saver_strategy)
@strategy = process_strategy(strategy)
@retention_strategy = process_retention_strategy(retention_strategy)
@delete_method = delete_method || :destroy
end
# @param manager_ref [Array] Array of Symbols, that are keys of the InventoryObject's data, inserted into this
# InventoryCollection. Using these keys, we need to be able to uniquely identify each of the InventoryObject
# objects inside.
# @param manager_ref_allowed_nil [Array] Array of symbols having manager_ref columns, that are a foreign key an can
# be nil. Given the table are shared by many providers, it can happen, that the table is used only partially.
# Then it can happen we want to allow certain foreign keys to be nil, while being sure the referential
# integrity is not broken. Of course the DB Foreign Key can't be created in this case, so we should try to
# avoid this usecase by a proper modeling.
# Note that InventoryObject's data has to be build with <foreign_key> => nil, it means that key cannot be missing!
# @param secondary_refs [Hash] TODO
# @param manager_uuids [Array|Proc] Array of manager_uuids of the InventoryObjects we want to create/update/delete. Using
# this attribute, the db_collection_for_comparison will be automatically limited by the manager_uuids, in a
# case of a simple relation. In a case of a complex relation, we can leverage :manager_uuids in a
# custom :targeted_arel. We can pass also lambda, for lazy_evaluation.
def init_references(manager_ref, manager_ref_allowed_nil, secondary_refs, manager_uuids)
@manager_ref = manager_ref || %i[ems_ref]
@manager_ref_allowed_nil = manager_ref_allowed_nil || []
@secondary_refs = secondary_refs || {}
@manager_uuids = manager_uuids || []
end
# @param all_manager_uuids [Array] Array of all manager_uuids of the InventoryObjects. With the :targeted true,
# having this parameter defined will invoke only :delete_method on a complement of this set, making sure
# the DB has only this set of data after. This :attribute serves for deleting of top level
# InventoryCollections, i.e. InventoryCollections having parent_inventory_collections nil. The deleting of
# child collections is already handled by the scope of the parent_inventory_collections and using Rails
# :dependent => :destroy,
# @param all_manager_uuids_scope [Array] A scope limiting the :all_manager_uuids parameter. E.g. we can send
# all_manager_uuids for 1 region, leading to delete a complement of the entities just under that 1
# region.
# If all_manager_uuids_scope is used with :all_manager_uuids => nil, it will do delete_complement of the
# scope itself. E.g. sending a list of all active regions, we will delete complement entities not
# belonging to those regions.
# Example:
# :all_manager_uuids => [{:source_ref => x}, {:source_ref => y}],
# :all_manager_uuids_scope => [{:region => regions.lazy_find(X)}, {:region => regions.lazy_find(Y)}]
#
# Will cause deletion/archival or all entities that don't have source_ref "x" or "y", but only under
# regions X and Y.
# @param all_manager_uuids_timestamp [String] A timestamp in UTC marking a time before we collected all of the
# all_manager_uuids. Meaning we won't be archiving any newer entities.
def init_all_manager_uuids(all_manager_uuids, all_manager_uuids_scope, all_manager_uuids_timestamp)
# TODO(lsmola) Should we refactor this to use references too?
@all_manager_uuids = all_manager_uuids
@all_manager_uuids_scope = all_manager_uuids_scope
@all_manager_uuids_timestamp = all_manager_uuids_timestamp
end
# @param dependency_attributes [Hash] Manually defined dependencies of this InventoryCollection. We can use this
# by manually place the InventoryCollection into the graph, to make sure the saving is invoked after the
# dependencies were saved. The dependencies itself are InventoryCollection objects. For a common use-cases
# we do not need to define dependencies manually, since those are inferred automatically by scanning of the
# data.
#
# Example:
# :dependency_attributes => {
# :orchestration_stacks => [collections[:orchestration_stacks]],
# :orchestration_stacks_resources => [collections[:orchestration_stacks_resources]]
# }
# This example is used in Example2 of the <param custom_save_block> and it means that our :custom_save_block
# will be invoked after the InventoryCollection :orchestration_stacks and :orchestration_stacks_resources
# are saved.
# @param parent_inventory_collections [Array] Array of symbols having a name pointing to the
# InventoryRefresh::InventoryCollection objects, that serve as parents to this InventoryCollection. There are
# several scenarios to consider, when deciding if InventoryCollection has parent collections, see the example.
#
# Example:
# taking inventory collections :vms and :disks (local disks), if we write that:
# inventory_collection = InventoryCollection.new({
# :model_class => ::Disk,
# :association => :disks,
# :manager_ref => [:vm, :location]
# :parent_inventory_collection => [:vms],
# })
#
# Then the decision for having :parent_inventory_collection => [:vms] was probably driven by these
# points:
# 1. We can get list of all disks only by doing SQL query through the parent object (so there will be join
# from vms to disks table).
# 2. There is no API query for getting all disks from the provider API, we get them inside VM data, or as
# a Vm subquery
# 3. Part of the manager_ref of the IC is the VM object (foreign key), so the disk's location is unique
# only under 1 Vm. (In current models, this modeled going through Hardware model)
# 4. In targeted refresh, we always expect that each Vm will be saved with all its disks.
#
# Then having the above points, adding :parent_inventory_collection => [:vms], will bring these
# implications:
# 1. By archiving/deleting Vm, we can no longer see the disk, because those were owned by the Vm. Any
# archival/deletion of the Disk model, must be then done by cascade delete/hooks logic.
# 2. Having Vm as a parent ensures we always process it first. So e.g. when providing no Vms for saving
# we would have no graph dependency (no data --> no edges --> no dependencies) and Disk could be
# archived/removed before the Vm, while we always want to archive the VM first.
# 3. For targeted refresh, we always expect that all disks are saved with a VM. So for targeting :disks,
# we are not using #manager_uuids attribute, since the scope is "all disks of all targeted VMs", so we
# always use #manager_uuids of the parent. (that is why :parent_inventory_collections and
# :manager_uuids are mutually exclusive attributes)
# 4. For automatically building the #targeted_arel query, we need the parent to know what is the root node.
# While this information can be introspected from the data, it creates a scope for create&update&delete,
# which means it has to work with no data provided (causing delete all). So with no data we cannot
# introspect anything.
def init_ic_relations(dependency_attributes, parent_inventory_collections = nil)
@dependency_attributes = dependency_attributes || {}
@dependees = Set.new
@parent_inventory_collections = parent_inventory_collections
end
# @param complete [Boolean] By default true, :complete is marking we are sending a complete dataset and therefore
# we can create/update/delete the InventoryObject objects. If :complete is false we will only do
# create/update without delete.
# @param create_only [Boolean] TODO
# @param check_changed [Boolean] By default true. If true, before updating the InventoryObject, we call Rails
# 'changed?' method. This can optimize speed of updates heavily, but it can fail to recognize the change for
# e.g. Ancestry and Relationship based columns. If false, we always update the InventoryObject.
# @param update_only [Boolean] By default false. If true we only update the InventoryObject objects, if false we do
# create/update/delete.
# @param use_ar_object [Boolean] True or False. Whether we need to initialize AR object as part of the saving
# it's needed if the model have special setters, serialize of columns, etc. This setting is relevant only
# for the batch saver strategy.
# @param targeted [Boolean] True if the collection is targeted, in that case it will be leveraging :manager_uuids
# :parent_inventory_collections and :targeted_arel to save a subgraph of a data.
def init_flags(complete, create_only, check_changed,
update_only, use_ar_object, targeted,
assert_graph_integrity)
@complete = complete.nil? ? true : complete
@create_only = create_only.nil? ? false : create_only
@check_changed = check_changed.nil? ? true : check_changed
@saved = false
@update_only = update_only.nil? ? false : update_only
@use_ar_object = use_ar_object || false
@targeted = !!targeted
@assert_graph_integrity = assert_graph_integrity.nil? ? true : assert_graph_integrity
end
# @param attributes_blacklist [Array] Attributes we do not want to include into saving. We cannot blacklist an
# attribute that is needed for saving of the object.
# Note: attributes_blacklist is also used for internal resolving of the cycles in the graph.
#
# In the Example2 of the <param custom_save_block>, we have a custom saving code, that saves a :parent
# attribute of the OrchestrationStack. That means we don't want that attribute saved as a part of
# InventoryCollection for OrchestrationStack, so we would set :attributes_blacklist => [:parent]. Then the
# :parent will be ignored while saving.
# @param attributes_whitelist [Array] Same usage as the :attributes_blacklist, but defining full set of attributes
# that should be saved. Attributes that are part of :manager_ref and needed validations are automatically
# added.
# @param inventory_object_attributes [Array] Array of attribute names that will be exposed as readers/writers on the
# InventoryObject objects inside.
#
# Example: Given
# inventory_collection = InventoryCollection.new({
# :model_class => ::Vm,
# :arel => @ems.vms,
# :inventory_object_attributes => [:name, :label]
# })
# And building the inventory_object like:
# inventory_object = inventory_collection.build(:ems_ref => "vm1", :name => "vm1")
# We can use inventory_object_attributes as setters and getters:
# inventory_object.name = "Name"
# inventory_object.label = inventory_object.name
# Which would be equivalent to less nicer way:
# inventory_object[:name] = "Name"
# inventory_object[:label] = inventory_object[:name]
# So by using inventory_object_attributes, we will be guarding the allowed attributes and will have an
# explicit list of allowed attributes, that can be used also for documentation purposes.
# @param batch_extra_attributes [Array] Array of symbols marking which extra attributes we want to store into the
# db. These extra attributes might be a product of :use_ar_object assignment and we need to specify them
# manually, if we want to use a batch saving strategy and we have models that populate attributes as a side
# effect.
def init_model_attributes(attributes_blacklist, attributes_whitelist,
inventory_object_attributes, batch_extra_attributes)
@attributes_blacklist = Set.new
@attributes_whitelist = Set.new
@batch_extra_attributes = batch_extra_attributes || []
@inventory_object_attributes = inventory_object_attributes
@internal_attributes = %i[__feedback_edge_set_parent __parent_inventory_collections __all_manager_uuids_scope]
@transitive_dependency_attributes = Set.new
blacklist_attributes!(attributes_blacklist) if attributes_blacklist.present?
whitelist_attributes!(attributes_whitelist) if attributes_whitelist.present?
end
def init_storages
@data_storage = ::InventoryRefresh::InventoryCollection::DataStorage.new(self, @secondary_refs)
@references_storage = ::InventoryRefresh::InventoryCollection::ReferencesStorage.new(index_proxy)
@targeted_scope = ::InventoryRefresh::InventoryCollection::ReferencesStorage.new(index_proxy).merge!(@manager_uuids)
end
# @param arel [ActiveRecord::Associations::CollectionProxy|Arel::SelectManager] Instead of :parent and :association
# we can provide Arel directly to say what records should be compared to check if InventoryObject will be
# doing create/update/delete.
#
# Example:
# for a targeted refresh, we want to delete/update/create only a list of vms specified with a list of
# ems_refs:
# :arel => manager.vms.where(:ems_ref => manager_refs)
# Then we want to do the same for the hardwares of only those vms:
# :arel => manager.hardwares.joins(:vm_or_template).where(
# 'vms' => {:ems_ref => manager_refs}
# )
# And etc. for the other Vm related records.
# @param targeted_arel [Proc] A callable block that receives this InventoryCollection as a first argument. In there
# we can leverage a :parent_inventory_collections or :manager_uuids to limit the query based on the
# manager_uuids available.
# Example:
# targeted_arel = lambda do |inventory_collection|
# # Getting ems_refs of parent :vms and :miq_templates
# manager_uuids = inventory_collection.parent_inventory_collections.collect(&:manager_uuids).flatten
# inventory_collection.db_collection_for_comparison.hardwares.joins(:vm_or_template).where(
# 'vms' => {:ems_ref => manager_uuids}
# )
# end
#
# inventory_collection = InventoryCollection.new({
# :model_class => ::Hardware,
# :association => :hardwares,
# :parent_inventory_collection => [:vms, :miq_templates],
# :targeted_arel => targeted_arel,
# })
def init_arels(arel, targeted_arel)
@arel = arel
@targeted_arel = targeted_arel
end
# @param custom_save_block [Proc] A custom lambda/proc for persisting in the DB, for cases where it's not enough
# to just save every InventoryObject inside by the defined rules and default saving algorithm.
#
# Example1 - saving SomeModel in my own ineffective way :-) :
#
# custom_save = lambda do |_ems, inventory_collection|
# inventory_collection.each |inventory_object| do
# hash = inventory_object.attributes # Loads possible dependencies into saveable hash
# obj = SomeModel.find_by(:attr => hash[:attr]) # Note: doing find_by for many models produces N+1
# # queries, avoid this, this is just a simple example :-)
# obj.update(hash) if obj
# obj ||= SomeModel.create(hash)
# inventory_object.id = obj.id # If this InventoryObject is referenced elsewhere, we need to store its
# primary key back to the InventoryObject
# end
#
# Example2 - saving parent OrchestrationStack in a more effective way, than the default saving algorithm can
# achieve. Ancestry gem requires an ActiveRecord object for association and is not defined as a proper
# ActiveRecord association. That leads in N+1 queries in the default saving algorithm, so we can do better
# with custom saving for now. The InventoryCollection is defined as a custom dependencies processor,
# without its own :model_class and InventoryObjects inside:
#
# InventoryRefresh::InventoryCollection.new({
# :association => :orchestration_stack_ancestry,
# :custom_save_block => orchestration_stack_ancestry_save_block,
# :dependency_attributes => {
# :orchestration_stacks => [collections[:orchestration_stacks]],
# :orchestration_stacks_resources => [collections[:orchestration_stacks_resources]]
# }
# })
#
# And the lambda is defined as:
#
# orchestration_stack_ancestry_save_block = lambda do |_ems, inventory_collection|
# stacks_inventory_collection = inventory_collection.dependency_attributes[:orchestration_stacks].try(:first)
#
# return if stacks_inventory_collection.blank?
#
# stacks_parents = stacks_inventory_collection.data.each_with_object({}) do |x, obj|
# parent_id = x.data[:parent].load.try(:id)
# obj[x.id] = parent_id if parent_id
# end
#
# model_class = stacks_inventory_collection.model_class
#
# stacks_parents_indexed = model_class
# .select([:id, :ancestry])
# .where(:id => stacks_parents.values).find_each.index_by(&:id)
#
# model_class
# .select([:id, :ancestry])
# .where(:id => stacks_parents.keys).find_each do |stack|
# parent = stacks_parents_indexed[stacks_parents[stack.id]]
# stack.update_attribute(:parent, parent)
# end
# end
# @param custom_reconnect_block [Proc] A custom lambda for reconnect logic of previously disconnected records
#
# Example - Reconnect disconnected Vms
# InventoryRefresh::InventoryCollection.new({
# :association => :orchestration_stack_ancestry,
# :custom_reconnect_block => vms_custom_reconnect_block,
# })
#
# And the lambda is defined as:
#
# vms_custom_reconnect_block = lambda do |inventory_collection, inventory_objects_index, attributes_index|
# inventory_objects_index.each_slice(1000) do |batch|
# Vm.where(:ems_ref => batch.map(&:second).map(&:manager_uuid)).each do |record|
# index = inventory_collection.object_index_with_keys(inventory_collection.manager_ref_to_cols, record)
#
# # We need to delete the record from the inventory_objects_index and attributes_index, otherwise it
# # would be sent for create.
# inventory_object = inventory_objects_index.delete(index)
# hash = attributes_index.delete(index)
#
# record.assign_attributes(hash.except(:id, :type))
# if !inventory_collection.check_changed? || record.changed?
# record.save!
# inventory_collection.store_updated_records(record)
# end
#
# inventory_object.id = record.id
# end
# end
def init_custom_procs(custom_save_block, custom_reconnect_block)
@custom_save_block = custom_save_block
@custom_reconnect_block = custom_reconnect_block
end
# @param default_values [Hash] A hash of an attributes that will be added to every inventory object created by
# inventory_collection.build(hash)
#
# Example: Given
# inventory_collection = InventoryCollection.new({
# :model_class => ::Vm,
# :arel => @ems.vms,
# :default_values => {:ems_id => 10}
# })
# And building the inventory_object like:
# inventory_object = inventory_collection.build(:ems_ref => "vm_1", :name => "vm1")
# The inventory_object.data will look like:
# {:ems_ref => "vm_1", :name => "vm1", :ems_id => 10}
def init_data(default_values)
@default_values = default_values || {}
end
def init_changed_records_stats
@created_records = []
@updated_records = []
@deleted_records = []
end
# Processes passed saver strategy
#
# @param saver_strategy [Symbol] Passed saver strategy
# @return [Symbol] Returns back the passed strategy if supported, or raises exception
def process_saver_strategy(saver_strategy)
return :default unless saver_strategy
saver_strategy = saver_strategy.to_sym
case saver_strategy
when :default, :batch, :concurrent_safe_batch
saver_strategy
else
raise "Unknown InventoryCollection saver strategy: :#{saver_strategy}, allowed strategies are "\
":default, :batch and :concurrent_safe_batch"
end
end
# Processes passed strategy, modifies :data_collection_finalized and :saved attributes for db only strategies
#
# @param strategy_name [Symbol] Passed saver strategy
# @return [Symbol] Returns back the passed strategy if supported, or raises exception
def process_strategy(strategy_name)
self.data_collection_finalized = false
return unless strategy_name
strategy_name = strategy_name.to_sym
case strategy_name
when :local_db_cache_all
self.data_collection_finalized = true
self.saved = true
when :local_db_find_references
self.saved = true
when :local_db_find_missing_references
nil
else
raise "Unknown InventoryCollection strategy: :#{strategy_name}, allowed strategies are :local_db_cache_all, "\
":local_db_find_references and :local_db_find_missing_references."
end
strategy_name
end
# Processes passed retention strategy
#
# @param retention_strategy [Symbol] Passed retention strategy
# @return [Symbol] Returns back the passed strategy if supported, or raises exception
def process_retention_strategy(retention_strategy)
return unless retention_strategy
retention_strategy = retention_strategy.to_sym
case retention_strategy
when :destroy, :archive
retention_strategy
else
raise "Unknown InventoryCollection retention strategy: :#{retention_strategy}, allowed strategies are "\
":destroy and :archive"
end
end
end
end
end
end