crowbar/crowbar-hadoop

View on GitHub
chef/cookbooks/hadoop/recipes/configure-disks.rb

Summary

Maintainability
A
30 mins
Test Coverage
#
# Cookbook Name: hadoop
# Recipe: configure-disks.rb
#
# Copyright (c) 2011 Dell Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: andi abes
#

#######################################################################
# Begin recipe transactions
#######################################################################
debug = node["hadoop"]["debug"]
# Find all the disks.
to_use_disks = []
found_disks = []
all_disks = node["crowbar"]["disks"]
all_disks.each { |k,v|
  to_use_disks << k if v["usage"] == "Storage"
}
Chef::Log.info("HADOOP: found disk: #{to_use_disks.join(':')}") if debug

dfs_base_dir = node[:hadoop][:hdfs][:dfs_base_dir]
# Walk over each of the disks, configuring it if we have to.
node[:hadoop][:devices] = []
node[:hadoop][:hdfs][:dfs_data_dir] = []
node[:hadoop][:mapred][:mapred_local_dir] = []
wait_for_format = false

def get_uuid(disk)
  uuid=nil
  IO.popen("blkid -c /dev/null -s UUID -o value #{disk}"){ |f|
    uuid=f.read.strip
  }
  uuid
end

to_use_disks.sort.each { |k|
  # By default, we will format first partition.
  target_suffix= k + "1"
  target_dev = "/dev/#{k}"
  target_dev_part = "/dev/#{target_suffix}"
  # Protect against OS's that confuse ohai. if the device isn't there,
  # don't try to use it.
  if ! File.exists?(target_dev)
    Chef::Log.warn("HADOOP : device: #{target_dev} doesn't seem to exist. ignoring")
    next
  end
  disk = Hash.new
  disk[:name] = target_dev_part

  # Make sure that the kernel is aware of the current state of the
  # drive partition tables.
  ::Kernel.system("partprobe #{target_dev}")
  # Let udev catch up, if needed
  sleep 3

  # Create the first partition on the disk if it does not already exist.
  # This takes barely any time, so don't bother parallelizing it.
  # Create the first partition starting at 1MB into the disk, and use GPT.
  # This ensures that it is optimally aligned from an RMW cycle minimization
  # standpoint for just about everything -- RAID stripes, SSD erase blocks,
  # 4k sector drives, you name it, and we can have >2TB volumes.
  unless ::Kernel.system("grep -q \'#{target_suffix}$\' /proc/partitions")
    Chef::Log.info("HADOOP: Creating hadoop partition on #{target_dev}")
    ::Kernel.system("parted -s #{target_dev} -- unit s mklabel gpt mkpart primary ext2 2048s -1M")
    ::Kernel.system("partprobe #{target_dev}")
    sleep 3
    ::Kernel.system("dd if=/dev/zero of=#{target_dev_part} bs=1024 count=65")
  end

  # Check to see if there is a volume on the first partition of the
  # drive.  If not, fork and exec our formatter.  We will wait later.
  if ::Kernel.system("blkid -c /dev/null #{target_dev_part} &>/dev/null")
    # This filesystem already exists.  Save its UUID for later.
    disk[:uuid]=get_uuid target_dev_part
  else
    Chef::Log.info("HADOOP: formatting #{target_dev_part}") if debug
    ::Kernel.exec "mkfs.ext3 #{target_dev_part}" unless ::Process.fork
    disk[:fresh] = true
    wait_for_format = true
  end

  found_disks << disk.dup
}

# Wait for formatting to finish
if wait_for_format
  Chef::Log.info("HADOOP: Waiting on all drives to finish formatting") if debug
  ::Process.waitall
end

# Setup the mount points, if needed
found_disks.each { |disk|
  if disk[:fresh]
    # We just created this filesystem.
    # Grab its UUID and create a mount point.
    disk[:uuid]=get_uuid disk[:name]
    Chef::Log.info("HADOOP: Adding #{disk[:name]} (#{disk[:uuid]}) to the Hadoop configuration.")
    disk[:mount_point]="#{dfs_base_dir}/hdfs01/#{disk[:uuid]}"
    ::Kernel.system("mkdir -p #{disk[:mount_point]}")
  elsif disk[:uuid]
    # This filesystem already existed.
    # If we did not create a mountpoint for it, print a warning and skip it.
    disk[:mount_point]="#{dfs_base_dir}/hdfs01/#{disk[:uuid]}"
    unless ::File.exists?(disk[:mount_point]) and ::File.directory?(disk[:mount_point])
      Chef::Log.warn("HADOOP: #{disk[:name]} (#{disk[:uuid]}) was not created by configure-disks, ignoring.")
      Chef::Log.warn("HADOOP: If you want to use this disk, please erase any data on it and zero the partition information.")
      next
    end
  end

  node[:hadoop][:devices] << disk
  node[:hadoop][:hdfs][:dfs_data_dir] << ::File.join(disk[:mount_point],"data")
  node[:hadoop][:mapred][:mapred_local_dir] << ::File.join(disk[:mount_point],"mapred")
  mount disk[:mount_point]  do
    device disk[:uuid]
    device_type :uuid
    options "noatime,nodiratime"
    dump 0
    pass 0 # no FSCK testing.
    fstype "ext3"
    action [:mount, :enable]
  end
}

# Prevent unneeded churn in the recipes by making sure things are sorted.
node[:hadoop][:hdfs][:dfs_data_dir].sort!
node[:hadoop][:mapred][:mapred_local_dir].sort!
node.save

#######################################################################
# End of recipe transactions
#######################################################################
Chef::Log.info("HADOOP : END hadoop:configure-disks") if debug