crowbar/crowbar-hadoop

View on GitHub
chef/cookbooks/hadoop/templates/default/hdfs-site.xml.erb

Summary

Maintainability
Test Coverage
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
  Cookbook: hadoop
  Filename: hdfs-site.xml
 
  Copyright (c) 2011 Dell Inc.
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
 
      http://www.apache.org/licenses/LICENSE-2.0
 
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-->

<!-- Generated by Chef for <%= @node[:fqdn] %> -->

<configuration>

<property>
    <name>dfs.access.time.precision</name>
    <value><%= node[:hadoop][:hdfs][:dfs_access_time_precision] %></value>
    <description>The access time for HDFS file is precise upto this value. The default
      value is 1 hour. Setting a value of 0 disables access times for
      HDFS.
    </description>
  </property>

  <property>
    <name>dfs.balance.bandwidthPerSec</name>
    <value><%= node[:hadoop][:hdfs][:dfs_balance_bandwidthPerSec] %></value>
    <description>Specifies the maximum amount of bandwidth that each datanode can
      utilize for the balancing purpose in term of the number of
      bytes per second.
    </description>
  </property>

  <property>
    <name>dfs.block.access.key.update.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_block_access_key_update_interval] %></value>
    <description>Interval in minutes at which namenode updates its access keys.
    </description>
  </property>

  <property>
    <name>dfs.block.access.token.enable</name>
    <value><%= node[:hadoop][:hdfs][:dfs_block_access_token_enable] %></value>
    <description>If "true", access tokens are used as capabilities for accessing
      datanodes. If "false", no access tokens are checked on
      accessing datanodes.
    </description>
  </property>

  <property>
    <name>dfs.block.access.token.lifetime</name>
    <value><%= node[:hadoop][:hdfs][:dfs_block_access_token_lifetime] %></value>
    <description>The lifetime of access tokens in minutes.
    </description>
  </property>

  <property>
    <name>dfs.block.size</name>
    <value><%= node[:hadoop][:hdfs][:dfs_block_size] %></value>
    <description>The default block size for new files.
    </description>
  </property>

  <property>
    <name>dfs.blockreport.initialDelay</name>
    <value><%= node[:hadoop][:hdfs][:dfs_blockreport_initialDelay] %></value>
    <description>Delay for first block report in seconds.
    </description>
  </property>

  <property>
    <name>dfs.blockreport.intervalMsec</name>
    <value><%= node[:hadoop][:hdfs][:dfs_blockreport_intervalMsec] %></value>
    <description>Determines block reporting interval in milliseconds.
    </description>
  </property>

  <property>
    <name>dfs.client.block.write.retries</name>
    <value><%= node[:hadoop][:hdfs][:dfs_client_block_write_retries] %></value>
    <description>The number of retries for writing blocks to the data nodes, before we
      signal failure to the application.
    </description>
  </property>

  <property>
    <name>dfs.data.dir</name>
    <value><%= node[:hadoop][:hdfs][:dfs_data_dir].join(",") %></value>
    <description>Determines where on the local filesystem an DFS data node should
      store its blocks. If this is a comma-delimited list of
      directories, then data will be stored in all named directories,
      typically on different devices. Directories that do not exist
      are ignored.
    </description>
  </property>

  <property>
    <name>dfs.datanode.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_address] %></value>
    <description>The address where the datanode server will listen to. If the port is
      0 then the server will start on a free port.
    </description>
  </property>

  <property>
    <name>dfs.datanode.data.dir.perm</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_data_dir_perm] %></value>
    <description>Permissions for the directories on on the local filesystem where the
      DFS data node store its blocks. The permissions can either be
      octal or symbolic.
    </description>
  </property>

  <property>
    <name>dfs.datanode.dns.interface</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_dns_interface] %></value>
    <description>The name of the Network Interface from which a data node should
      report its IP address.
    </description>
  </property>

  <property>
    <name>dfs.datanode.dns.nameserver</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_dns_nameserver] %></value>
    <description>The host name or IP address of the name server (DNS) which a DataNode
      should use to determine the host name used by the NameNode for
      communication and display purposes.
    </description>
  </property>

  <property>
    <name>dfs.datanode.du.reserved</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_du_reserved] %></value>
    <description>Reserved space in bytes per volume. Always leave this much space free
      for non dfs use.
    </description>
  </property>

  <property>
    <name>dfs.datanode.failed.volumes.tolerated</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_failed_volumes_tolerated] %></value>
    <description>The number of volumes that are allowed to fail before a datanode
      stops offering service. By default any volume failure will
      cause a datanode to shutdown.
    </description>
  </property>

  <property>
    <name>dfs.datanode.handler.count</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_handler_count] %></value>
    <description>The number of server threads for the datanode.
    </description>
  </property>

  <property>
    <name>dfs.datanode.http.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_http_address] %></value>
    <description>The datanode http server address and port. If the port is 0 then the
      server will start on a free port.
    </description>
  </property>

  <property>
    <name>dfs.datanode.https.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_https_address] %></value>
    <description>
    </description>
  </property>

  <property>
    <name>dfs.datanode.ipc.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_datanode_ipc_address] %></value>
    <description>The datanode ipc server address and port. If the port is 0 then the
      server will start on a free port.
    </description>
  </property>

  <property>
    <name>dfs.default.chunk.view.size</name>
    <value><%= node[:hadoop][:hdfs][:dfs_default_chunk_view_size] %></value>
    <description>The number of bytes to view for a file on the browser.
    </description>
  </property>

  <property>
    <name>dfs.df.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_df_interval] %></value>
    <description>Disk usage statistics refresh interval in msec.
    </description>
  </property>

  <property>
    <name>dfs.heartbeat.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_heartbeat_interval] %></value>
    <description>Determines datanode heartbeat interval in seconds.
    </description>
  </property>

  <property>
    <name>dfs.hosts</name>
    <value><%= node[:hadoop][:hdfs][:dfs_hosts] %></value>
    <description>Names a file that contains a list of hosts that are permitted to
      connect to the namenode. The full pathname of the file must be
      specified. If the value is empty, all hosts are permitted.
    </description>
  </property>

  <property>
    <name>dfs.hosts.exclude</name>
    <value><%= node[:hadoop][:hdfs][:dfs_hosts_exclude] %></value>
    <description>Names a file that contains a list of hosts that are not permitted to
      connect to the namenode. The full pathname of the file must be
      specified. If the value is empty, no hosts are excluded.
    </description>
  </property>

  <property>
    <name>dfs.http.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_http_address] %></value>
    <description>The address and the base port where the dfs namenode web ui will
      listen on. If the port is 0 then the server will start on a
      free port.
    </description>
  </property>

  <property>
    <name>dfs.https.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_https_address] %></value>
    <description>
    </description>
  </property>

  <property>
    <name>dfs.https.client.keystore.resource</name>
    <value><%= node[:hadoop][:hdfs][:dfs_https_client_keystore_resource] %></value>
    <description>Resource file from which ssl client keystore information will be
      extracted.
    </description>
  </property>

  <property>
    <name>dfs.https.enable</name>
    <value><%= node[:hadoop][:hdfs][:dfs_https_enable] %></value>
    <description>Decide if HTTPS(SSL) is supported on HDFS.
    </description>
  </property>

  <property>
    <name>dfs.https.need.client.auth</name>
    <value><%= node[:hadoop][:hdfs][:dfs_https_need_client_auth] %></value>
    <description>Whether SSL client certificate authentication is required.
    </description>
  </property>

  <property>
    <name>dfs.https.server.keystore.resource</name>
    <value><%= node[:hadoop][:hdfs][:dfs_https_server_keystore_resource] %></value>
    <description>Resource file from which ssl server keystore information will be
      extracted.
    </description>
  </property>

  <property>
    <name>dfs.max.objects</name>
    <value><%= node[:hadoop][:hdfs][:dfs_max_objects] %></value>
    <description>The maximum number of files, directories and blocks dfs supports. A
      value of zero indicates no limit to the number of objects that
      dfs supports.
    </description>
  </property>

  <property>
    <name>dfs.name.dir</name>
    <value><%= node[:hadoop][:hdfs][:dfs_name_dir].join(",") %></value>
    <description>Determines where on the local filesystem the DFS name node should
      store the name table(fsimage). If this is a comma-delimited
      list of directories then the name table is replicated in all of
      the directories, for redundancy.
    </description>
  </property>

  <property>
    <name>dfs.name.edits.dir</name>
    <value><%= node[:hadoop][:hdfs][:dfs_name_edits_dir].join(",") %></value>
    <description>Determines where on the local filesystem the DFS name node should
      store the transaction (edits) file. If this is a
      comma-delimited list of directories then the transaction file
      is replicated in all of the directories, for redundancy.
      Default value is same as dfs.name.dir.
    </description>
  </property>

  <property>
    <name>dfs.namenode.decommission.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_decommission_interval] %></value>
    <description>Namenode periodicity in seconds to check if decommission is complete.
    </description>
  </property>

  <property>
    <name>dfs.namenode.decommission.nodes.per.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_decommission_nodes_per_interval] %></value>
    <description>The number of nodes namenode checks if decommission is complete in
      each dfs.namenode.decommission.interval.
    </description>
  </property>

  <property>
    <name>dfs.namenode.delegation.key.update-interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_key_update_interval] %></value>
    <description>The update interval for master key for delegation tokens in the
      namenode in milliseconds.
    </description>
  </property>

  <property>
    <name>dfs.namenode.delegation.token.max-lifetime</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_token_max_lifetime] %></value>
    <description>The maximum lifetime in milliseconds for which a delegation token is
      valid.
    </description>
  </property>

  <property>
    <name>dfs.namenode.delegation.token.renew-interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_token_renew_interval] %></value>
    <description>The renewal interval for delegation token in milliseconds.
    </description>
  </property>

  <property>
    <name>dfs.namenode.handler.count</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_handler_count] %></value>
    <description>The number of server threads for the namenode.
    </description>
  </property>

  <property>
    <name>dfs.namenode.logging.level</name>
    <value><%= node[:hadoop][:hdfs][:dfs_namenode_logging_level] %></value>
    <description>The logging level for dfs namenode. Other values are "dir"(trac e
      namespace mutations), "block"(trace block under/over
      replications and block creations/deletions), or "all".
    </description>
  </property>

  <property>
    <name>dfs.permissions</name>
    <value><%= node[:hadoop][:hdfs][:dfs_permissions] %></value>
    <description>If "true", enable permission checking in HDFS. If "false", permission
      checking is turned off, but all other behavior is unchanged.
      Switching from one parameter value to the other does not change
      the mode, owner or group of files or directories.
    </description>
  </property>

  <property>
    <name>dfs.permissions.supergroup</name>
    <value><%= node[:hadoop][:hdfs][:dfs_permissions_supergroup] %></value>
    <description>The name of the group of super-users.
    </description>
  </property>

  <property>
    <name>dfs.replication</name>
    <value><%= node[:hadoop][:hdfs][:dfs_replication] %></value>
    <description>Default block replication. The actual number of replications can be
      specified when the file is created. The default is used if
      replication is not specified in create time.
    </description>
  </property>

  <property>
    <name>dfs.replication.considerLoad</name>
    <value><%= node[:hadoop][:hdfs][:dfs_replication_considerLoad] %></value>
    <description>Decide if chooseTarget considers the target's load or not.
    </description>
  </property>

  <property>
    <name>dfs.replication.interval</name>
    <value><%= node[:hadoop][:hdfs][:dfs_replication_interval] %></value>
    <description>The periodicity in seconds with which the namenode computes
      repliaction work for datanodes.
    </description>
  </property>

  <property>
    <name>dfs.replication.max</name>
    <value><%= node[:hadoop][:hdfs][:dfs_replication_max] %></value>
    <description>Maximal block replication.
    </description>
  </property>

  <property>
    <name>dfs.replication.min</name>
    <value><%= node[:hadoop][:hdfs][:dfs_replication_min] %></value>
    <description>Minimal block replication.
    </description>
  </property>

  <property>
    <name>dfs.safemode.extension</name>
    <value><%= node[:hadoop][:hdfs][:dfs_safemode_extension] %></value>
    <description>Determines extension of safe mode in milliseconds after the threshold
      level is reached.
    </description>
  </property>

  <property>
    <name>dfs.safemode.threshold.pct</name>
    <value><%= node[:hadoop][:hdfs][:dfs_safemode_threshold_pct] %></value>
    <description>Specifies the percentage of blocks that should satisfy the minimal
      replication requirement defined by dfs.replication.min. Values
      less than or equal to 0 mean not to start in safe mode. Values
      greater than 1 will make safe mode permanent.
    </description>
  </property>

  <property>
    <name>dfs.secondary.http.address</name>
    <value><%= node[:hadoop][:hdfs][:dfs_secondary_http_address] %></value>
    <description>The secondary namenode http server address and port. If the port is 0
      then the server will start on a free port.
    </description>
  </property>

  <property>
    <name>dfs.support.append</name>
    <value><%= node[:hadoop][:hdfs][:dfs_support_append] %></value>
    <description>Does HDFS allow appends to files? This is currently set to false
      because there are bugs in the "append code" and is not
      supported in any prodction cluster.
    </description>
  </property>

  <property>
    <name>dfs.web.ugi</name>
    <value><%= node[:hadoop][:hdfs][:dfs_web_ugi] %></value>
    <description>The user account used by the web interface. Syntax:
      USERNAME,GROUP1,GROUP2, ...
    </description>
  </property>

</configuration>