crowbar/crowbar-openstack

View on GitHub
chef/cookbooks/postgresql/recipes/config_pgtune.rb

Summary

Maintainability
A
0 mins
Test Coverage
#
# Cookbook Name:: postgresql
# Recipe:: config_pgtune
# Author:: David Crane (<davidc@donorschoose.org>)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#######
# Load the binaryround(value) method from libraries/default.rb
::Chef::Recipe.send(:include, Opscode::PostgresqlHelpers)

#######
# This recipe is based on Greg Smith's pgtune script (the Feb 1, 2012
# version at https://github.com/gregs1104/pgtune). Introduction: pgtune
# takes the wimpy default postgresql.conf and expands the database
# server to be as powerful as the hardware it's being deployed on.
#
# The default postgresql.conf aims at a system with approximately 128MB
# of RAM. This recipe recommends a baseline configuration in the right
# general range for a dedicated Postgresql system.
#
# This recipe takes three optional parameters that may be passed in as
# node['postgresql']['config_pgtune'] attributes:
#   * db_type -- Specifies database type as one of: dw, oltp,
#     web, mixed, desktop. If not specified, the default is mixed.
#   * max_connections -- Specifies number of maximum connections
#     expected. If not specified, it depends on database type.
#   * total_memory -- Specifies total system memory. If not specified,
#     it will be detected from the Ohai automatic attributes.
#
# Using those inputs, this recipe will compute and set the following
# node.default['postgresql']['config'] attributes:
#   * max_connections
#   * shared_buffers
#   * effective_cache_size
#   * work_mem
#   * maintenance_work_mem
#   * checkpoint_segments
#   * checkpoint_completion_target
#   * wal_buffers
#   * default_statistics_target
#
# This recipe deviates from the original pgtune script for 2 settings:
# shared_buffers is capped for large memory systems (which Greg
# mentioned in a TODO.rst) and wal_buffers will auto-tune starting with
# 9.1 (which is a feature that Greg built into Postgresql).

#######
# These are the workload characteristics of the five database types
# that can be specified as node['postgresql']['config_pgtune']['db_type']:
#
# dw -- Data Warehouse
#   * Typically I/O- or RAM-bound
#   * Large bulk loads of data
#   * Large complex reporting queries
#   * Also called "Decision Support" or "Business Intelligence"
#
# oltp -- Online Transaction Processing
#   * Typically CPU- or I/O-bound
#   * DB slightly larger than RAM to 1TB
#   * 20-40% small data write queries
#   * Some long transactions and complex read queries
#
# web -- Web Application
#   * Typically CPU-bound
#   * DB much smaller than RAM
#   * 90% or more simple queries
#
# mixed -- Mixed DW and OLTP characteristics
#   * A wide mixture of queries
#
# desktop -- Not a dedicated database
#   * A general workstation, perhaps for a developer

# Parse out db_type option, or use default.
db_type = "mixed"

if (node["postgresql"].attribute?("config_pgtune") && node["postgresql"]["config_pgtune"].attribute?("db_type"))
  db_type = node["postgresql"]["config_pgtune"]["db_type"]
  if (!(["dw","oltp","web","mixed","desktop"].include?(db_type)))
    Chef::Application.fatal!([
        "Bad value (#{db_type})",
        "for node['postgresql']['config_pgtune']['db_type'] attribute.",
        "Valid values are one of dw, oltp, web, mixed, desktop."
      ].join(" "))
  end
end

# Parse out max_connections option, or use a value based on db_type.
con =
{ "web" => 200,
  "oltp" => 300,
  "dw" => 20,
  "mixed" => 80,
  "desktop" => 5
}.fetch(db_type)

if (node["postgresql"].attribute?("config_pgtune") && node["postgresql"]["config_pgtune"].attribute?("max_connections"))
  max_connections = node["postgresql"]["config_pgtune"]["max_connections"]
  if (max_connections.match(/\A[1-9]\d*\Z/) == nil)
    Chef::Application.fatal!([
        "Bad value (#{max_connections})",
        "for node['postgresql']['config_pgtune']['max_connections'] attribute.",
        "Valid values are non-zero integers only."
      ].join(" "))
  end
  con = max_connections.to_i
end

# Parse out total_memory option, or use value detected by Ohai.
total_memory = node["memory"]["total"]

# Override max_connections with a node attribute if DevOps desires.
# For example, on a system *not* dedicated to Postgresql.
if (node["postgresql"].attribute?("config_pgtune") && node["postgresql"]["config_pgtune"].attribute?("total_memory"))
  total_memory = node["postgresql"]["config_pgtune"]["total_memory"]
  if (total_memory.match(/\A[1-9]\d*kB\Z/) == nil)
    Chef::Application.fatal!([
        "Bad value (#{total_memory})",
        "for node['postgresql']['config_pgtune']['total_memory'] attribute.",
        "Valid values are non-zero integers followed by kB (e.g., 49416564kB)."
      ].join(" "))
  end
end

# Ohai reports node[:memory][:total] in kB, as in "921756kB"
mem = total_memory.split("kB")[0].to_i / 1024 # in MB

#######
# RAM-related settings computed as in Greg Smith's pgtune script.
# Remember that con and mem were either chosen above based on the
# db_type or the actual total memory, or were passed in attributes.

# (1) max_connections
#     Sets the maximum number of concurrent connections.
node.default["postgresql"]["config"]["max_connections"] = con

# The calculations for the next four settings would not be optimal
# for low memory systems. In that case, the calculation is skipped,
# leaving the built-in Postgresql settings, which are actually
# intended for those low memory systems.
if (mem >= 256)

  # (2) shared_buffers
  #     Sets the number of shared memory buffers used by the server.
  shared_buffers =
  { "web" => mem/4,
    "oltp" => mem/4,
    "dw" => mem/4,
    "mixed" => mem/4,
    "desktop" => mem/16
  }.fetch(db_type)

  # Robert Haas has advised to cap the size of shared_buffers based on
  # the memory architecture: 2GB on 32-bit and 8GB on 64-bit machines.
  # http://rhaas.blogspot.com/2012/03/tuning-sharedbuffers-and-walbuffers.html
  case node["kernel"]["machine"]
  when "i386" # 32-bit machines
    if shared_buffers > 2*1024
      shared_buffers = 2*1024
    end
  when "x86_64" # 64-bit machines
    if shared_buffers > 8*1024
      shared_buffers = 8*1024
    end
  end

  node.default["postgresql"]["config"]["shared_buffers"] = binaryround(shared_buffers*1024*1024)

  # (3) effective_cache_size
  #     Sets the planner's assumption about the size of the disk cache.
  #     That is, the portion of the kernel's disk cache that will be
  #     used for PostgreSQL data files.
  effective_cache_size =
  { "web" => mem * 3 / 4,
    "oltp" => mem * 3 / 4,
    "dw" => mem * 3 / 4,
    "mixed" => mem * 3 / 4,
    "desktop" => mem / 4
  }.fetch(db_type)

  node.default["postgresql"]["config"]["effective_cache_size"] = binaryround(effective_cache_size*1024*1024)

  # (4) work_mem
  #     Sets the maximum memory to be used for query workspaces.
  work_mem =
  { "web" => mem / con,
    "oltp" => mem / con,
    "dw" => mem / con / 2,
    "mixed" => mem / con / 2,
    "desktop" => mem / con / 6
  }.fetch(db_type)

  node.default["postgresql"]["config"]["work_mem"] = binaryround(work_mem*1024*1024)

  # (5) maintenance_work_mem
  #     Sets the maximum memory to be used for maintenance operations.
  #     This includes operations such as VACUUM and CREATE INDEX.
  maintenance_work_mem =
  { "web" => mem / 16,
    "oltp" => mem / 16,
    "dw" => mem / 8,
    "mixed" => mem / 16,
    "desktop" => mem / 16
  }.fetch(db_type)

  # Cap maintenence RAM at 1GB on servers with lots of memory
  if (maintenance_work_mem > 1*1024)
      maintenance_work_mem = 1*1024
  end

  node.default["postgresql"]["config"]["maintenance_work_mem"] = binaryround(maintenance_work_mem*1024*1024)

end

#######
# Checkpoint-related parameters that affect transaction rate and
# maximum tolerable recovery playback time.

# (6) checkpoint_segments
#     Sets the maximum distance in log segments between automatic WAL checkpoints.
checkpoint_segments =
{ "web" => 8,
  "oltp" => 16,
  "dw" => 64,
  "mixed" => 16,
  "desktop" => 3
}.fetch(db_type)

node.default["postgresql"]["config"]["checkpoint_segments"] = checkpoint_segments

# (7) checkpoint_completion_target
#     Time spent flushing dirty buffers during checkpoint, as fraction
#     of checkpoint interval.
checkpoint_completion_target =
{ "web" => "0.7",
  "oltp" => "0.9",
  "dw" => "0.9",
  "mixed" => "0.9",
  "desktop" => "0.5"
}.fetch(db_type)

node.default["postgresql"]["config"]["checkpoint_completion_target"] = checkpoint_completion_target

# (8) wal_buffers
#     Sets the number of disk-page buffers in shared memory for WAL.
# Starting with 9.1, wal_buffers will auto-tune if set to the -1 default.
# For 8.X and 9.0, it needed to be specified, which pgtune did as follows.
if node["postgresql"]["version"].to_f < 9.1
  wal_buffers = 512 * checkpoint_segments
  # The pgtune seems to use 1kB units for wal_buffers
  node.default["postgresql"]["config"]["wal_buffers"] = binaryround(wal_buffers*1024)
else
  node.default["postgresql"]["config"]["wal_buffers"] = "-1"
end

# (9) default_statistics_target
#     Sets the default statistics target. This applies to table columns
#     that have not had a column-specific target set via
#     ALTER TABLE SET STATISTICS.
default_statistics_target =
{ "web" => 100,
  "oltp" => 100,
  "dw" => 500,
  "mixed" => 100,
  "desktop" => 100
}.fetch(db_type)

node.default["postgresql"]["config"]["default_statistics_target"] = default_statistics_target