lib/kurchatov/monitor.rb
module Kurchatov
class Monitor
class Task
include Kurchatov::Mixin::Event
attr_accessor :thread, :instance
attr_reader :count_errors, :last_error, :last_error_at
def initialize(plugin)
@plugin = plugin
@thread = Thread.new { @plugin.start! }
@count_errors = 0
@last_error = nil
@last_error_at = nil
@last_error_count = 0
end
def name
@plugin.name
end
def config
@plugin.plugin_config
end
def stop!
Thread.kill(@thread)
end
def stopped?
@plugin.stopped?
end
def died?
if @thread.alive?
@last_error_count = 0
return false
end
# thread died, join and extract error
begin
@thread.join # call error
rescue => e
desc = "Plugin '#{@plugin.name}' died. #{e.class}: #{e}.\n" +
"Trace: #{e.backtrace.join("\n")}"
@count_errors += 1
@last_error_count += 1
@last_error = desc
@last_error_at = Time.now
Log.error(desc)
if @plugin.ignore_errors == false || (@plugin.ignore_errors.class == 'Fixnum' && @plugin.ignore_errors > @plugin.last_error_count)
event(:service => "plugin #{@plugin.name} errors", :desc => desc, :state => 'critical')
end
end
true
end
def start!
@thread = Thread.new { @plugin.start! }
end
end
attr_accessor :tasks
CHECK_ALIVE_TIMEOUT = 5
def initialize
@tasks = Array.new
end
def <<(plugin)
Log.debug("Add new plugin: #{plugin.inspect}")
tasks << Task.new(plugin)
end
def start!
loop do
tasks.each do |task|
task.start! if task.died?
if task.stopped?
task.stop!
tasks.delete(task)
end
end
Log.debug("Check alive plugins [#{tasks.count}]")
sleep CHECK_ALIVE_TIMEOUT
end
end
def inspect
tasks.map do |t|
{
"name" => t.name,
"config" => t.config,
"errors" => {"count" => t.count_errors, "last" => t.last_error, "time" => t.last_error_at}
}
end
end
end
end