chef/cookbooks/ceph/recipes/osd.rb from crowbar/crowbar-ceph

chef/cookbooks/ceph/recipes/osd.rb
Summary

Maintainability

3 hrs
Test Coverage

Issues
#
# Author:: Kyle Bader <kyle.bader@dreamhost.com>
# Cookbook Name:: ceph
# Recipe:: osd
#
# Copyright 2011, DreamHost Web Hosting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# this recipe allows bootstrapping new osds, with help from mon
# Sample environment:
# #knife node edit ceph1
#"osd_devices": [
#   {
#       "device": "/dev/sdc"
#   },
#   {
#       "device": "/dev/sdd",
#       "dmcrypt": true,
#       "journal": "/dev/sdd"
#   }
#]

include_recipe "ceph::default"
include_recipe "ceph::server"
include_recipe "ceph::conf"

package "gdisk"

service_type = node["ceph"]["osd"]["init_style"]
mons = get_mon_nodes("ceph_bootstrap-osd-secret:*")

if mons.empty? then
  Chef::Log.fatal("No ceph-mon found")
  raise "No ceph-mon found"
elsif mons[0]["ceph"]["bootstrap-osd-secret"].empty?
  Chef::Log.fatal("No authorization keys found")
else

  # These directories are all created by the ceph packages on SUSE distros.
  # TODO: Check if this is true for other distros (it probably is)
  ["tmp", "osd", "bootstrap-osd"].each do |name|
    directory "/var/lib/ceph/#{name}" do
      owner "ceph"
      group "ceph"
      mode "0750"
      recursive true
      action :create
    end
  end unless node["platform_family"] == "suse"

  # TODO cluster name
  cluster = "ceph"

  osd_secret = mons[0]["ceph"]["bootstrap-osd-secret"]

  execute "create bootstrap-osd keyring" do
    command "ceph-authtool '/var/lib/ceph/bootstrap-osd/#{cluster}.keyring' --create-keyring --name=client.bootstrap-osd --add-key='#{osd_secret}'"
  end

  if is_crowbar?
    dirty = false

    node.set["ceph"]["osd_devices"] = [] if node["ceph"]["osd_devices"].nil?
    min_size_blocks = node["ceph"]["osd"]["min_size_gb"] * 1024 * 1024 * 2
    unclaimed_disks = BarclampLibrary::Barclamp::Inventory::Disk.unclaimed(node).sort.select { |d| d.size >= min_size_blocks }

    # if devices for journal are explicitely listed, do not use automatic journal assigning to SSD
    if !node["ceph"]["osd"]["journal_devices"].empty?
      # explicit comparison because we don't want a condition that uses nil
      if node["ceph"]["osd"]["use_ssd_for_journal"] != false
        node.set["ceph"]["osd"]["use_ssd_for_journal"] = false
        dirty = true
      end
    end

    # If no OSDs have yet been deployed, check what type of disks are available.
    # If they are all of one type, turn off automatic journal assigning to SSD
    # (automatic SSD journals only makes sense if there's a mix of disk types).
    # Note: this also effectively disables SSD journal assignment if there's
    # only one disk available (can't have a mix of disks if there's only one
    # disk!)
    if node["ceph"]["osd_devices"].empty? && unclaimed_disks.any?
      has_ssds = unclaimed_disks.any? { |d| node[:block_device][d.name.gsub("/dev/", "")]["rotational"] == "0" }
      has_hdds = unclaimed_disks.any? { |d| node[:block_device][d.name.gsub("/dev/", "")]["rotational"] == "1" }

      use_ssd_for_journal = has_ssds && has_hdds
      if node["ceph"]["osd"]["use_ssd_for_journal"] != use_ssd_for_journal
        node.set["ceph"]["osd"]["use_ssd_for_journal"] = use_ssd_for_journal
        dirty = true
      end
    end

    if node["ceph"]["disk_mode"] == "first" && node["ceph"]["osd_devices"].empty?
      if unclaimed_disks.empty?
        Chef::Log.fatal("There are no suitable disks for ceph")
        raise "There are no suitable disks for ceph"
      else
        # take first available disk, regardless of whether it's an SSD or not
        # (use_ssd_for_journal doesn't make sense if you're only trying to claim
        # one disk)
        disk_list = [unclaimed_disks.first]
      end
    elsif node["ceph"]["disk_mode"] == "all"
      disk_list = unclaimed_disks
    else
      disk_list = []
    end

    # Now, we have the final list of devices to claim, so claim them
    disk_list.select do |d|
      if d.claim("Ceph")
        Chef::Log.info("Ceph: Claimed #{d.name}")
        device = {}
        dev_name = d.name.gsub("/dev/", "")
        if node["ceph"]["osd"]["journal_devices"].include?(d.name) ||
            (node[:block_device][dev_name]["rotational"] == "0" &&
             node["ceph"]["osd"]["use_ssd_for_journal"] &&
             node["ceph"]["disk_mode"] == "all")
          # Disk marked as journal if explicitly specified in journal_devices,
          # or if disk is SSD, and use_ssd_for_journal and disk_mode == all.
          # Note: journal_devices with disk_mode == first probably doesn't work,
          # but if you know how to define journal_devices, you probably know
          # you don't want to only allocate one disk to ceph.
          Chef::Log.info("Ceph: Mark #{d.name} as journal")
          device["status"] = "journal"
        end
        device["device"] = d.name
        node.set["ceph"]["osd_devices"].push(device)
        dirty = true
      else
        Chef::Log.info("Ceph: Ignoring #{d.name}")
      end
    end

    # Calling ceph-disk-prepare is sufficient for deploying an OSD
    # After ceph-disk-prepare finishes, the new device will be caught
    # by udev which will run ceph-disk-activate on it (udev will map
    # the devices if dm-crypt is used).
    # IMPORTANT:
    #  - Always use the default path for OSD (i.e. /var/lib/ceph/
    # osd/$cluster-$id)
    #  - $cluster should always be ceph
    #  - The --dmcrypt option will be available starting w/ Cuttlefish
    unless disk_list.empty?
      ssd_devices = node["ceph"]["osd_devices"].select { |d| d["status"] == "journal" }
      partitions_per_ssd = (disk_list.size - ssd_devices.size) / ssd_devices.size rescue 1
      ssd_index         = 0
      ssd_partitions    = 1
      node["ceph"]["osd_devices"].each_with_index do |osd_device,index|
        if !osd_device["status"].nil?
          Log.info("osd: osd_device #{osd_device['device']} has already been set up.")
          next
        end
        create_cmd = "ceph-disk prepare --cluster '#{cluster}' --journal-dev --zap-disk '#{osd_device['device']}'"
        unless ssd_devices.empty?
          ssd_device            = ssd_devices[ssd_index]
          journal_device        = ssd_device["device"]
          create_cmd            = create_cmd + " #{journal_device}" if journal_device
          # move to next fee SSD if number of partitions on current one is too big
          ssd_partitions        = ssd_partitions + 1
          if ssd_partitions > partitions_per_ssd && ssd_devices[ssd_index+1]
            ssd_partitions      = 0
            ssd_index           = ssd_index + 1
          end
        end

        if node[:platform_family] == "rhel"
          # redhat has buggy udev so we have to use workaround from ceph
          b_dev = osd_device["device"].gsub("/dev/", "")
          create_cmd = create_cmd + " && ceph-disk-udev 2 #{b_dev}2 #{b_dev} ; ceph-disk-udev 1 #{b_dev}1 #{b_dev}"
        else
          extra_options = ""
          extra_options = "--mark-init systemd" if service_type == "systemd"
          create_cmd = create_cmd + " && ceph-disk activate #{extra_options} -- '#{osd_device['device']}1'"
        end

        execute "Activating Ceph OSD on #{osd_device['device']}" do
          command create_cmd
          action :run
        end

        ruby_block "Get Ceph OSD ID for #{osd_device['device']}" do
          block do
            require "timeout"
            begin
              Timeout.timeout(600) do
                osd_id = ""
                while osd_id.empty?
                  osd_id = get_osd_id(osd_device["device"])
                  sleep 1
                end
              end
            rescue Timeout::Error
              message = "Cannot fetch OSD ID for #{osd_device["device"]}!"
              Chef::Log.fatal(message)
              raise message
            end
          end
        end
        if node["ceph"]["osd_devices"][index]["status"] != "deployed"
          node.set["ceph"]["osd_devices"][index]["status"] = "deployed"
          dirty = true
        end
        # if journal_device is nil, this will still work as expected
        if node["ceph"]["osd_devices"][index]["journal"] != journal_device
          node.set["ceph"]["osd_devices"][index]["journal"] = journal_device
          dirty = true
        end

        # No need to specifically enable ceph-osd@N on systemd systems, as this
        # is done automatically by ceph-disk-activate
      end

      service "ceph_osd" do
        case service_type
        when "upstart"
          service_name "ceph-osd-all-starter"
          provider Chef::Provider::Service::Upstart
        else
          service_name "ceph"
        end
        action [:enable, :start]
        supports restart: true
        subscribes :restart, resources(template: "/etc/ceph/ceph.conf")
      end unless service_type == "systemd"

      # In addition to the osd services, ceph targets must be enabled when using systemd
      if service_type == "systemd"
        service "ceph-osd.target" do
          action :enable
        end
        service "ceph.target" do
          action :enable
        end
      end
    end

    node.save if dirty
  end
end