chef/cookbooks/hadoop/templates/default/hdfs-site.xml.erb
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Cookbook: hadoop
Filename: hdfs-site.xml
Copyright (c) 2011 Dell Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Generated by Chef for <%= @node[:fqdn] %> -->
<configuration>
<property>
<name>dfs.access.time.precision</name>
<value><%= node[:hadoop][:hdfs][:dfs_access_time_precision] %></value>
<description>The access time for HDFS file is precise upto this value. The default
value is 1 hour. Setting a value of 0 disables access times for
HDFS.
</description>
</property>
<property>
<name>dfs.balance.bandwidthPerSec</name>
<value><%= node[:hadoop][:hdfs][:dfs_balance_bandwidthPerSec] %></value>
<description>Specifies the maximum amount of bandwidth that each datanode can
utilize for the balancing purpose in term of the number of
bytes per second.
</description>
</property>
<property>
<name>dfs.block.access.key.update.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_block_access_key_update_interval] %></value>
<description>Interval in minutes at which namenode updates its access keys.
</description>
</property>
<property>
<name>dfs.block.access.token.enable</name>
<value><%= node[:hadoop][:hdfs][:dfs_block_access_token_enable] %></value>
<description>If "true", access tokens are used as capabilities for accessing
datanodes. If "false", no access tokens are checked on
accessing datanodes.
</description>
</property>
<property>
<name>dfs.block.access.token.lifetime</name>
<value><%= node[:hadoop][:hdfs][:dfs_block_access_token_lifetime] %></value>
<description>The lifetime of access tokens in minutes.
</description>
</property>
<property>
<name>dfs.block.size</name>
<value><%= node[:hadoop][:hdfs][:dfs_block_size] %></value>
<description>The default block size for new files.
</description>
</property>
<property>
<name>dfs.blockreport.initialDelay</name>
<value><%= node[:hadoop][:hdfs][:dfs_blockreport_initialDelay] %></value>
<description>Delay for first block report in seconds.
</description>
</property>
<property>
<name>dfs.blockreport.intervalMsec</name>
<value><%= node[:hadoop][:hdfs][:dfs_blockreport_intervalMsec] %></value>
<description>Determines block reporting interval in milliseconds.
</description>
</property>
<property>
<name>dfs.client.block.write.retries</name>
<value><%= node[:hadoop][:hdfs][:dfs_client_block_write_retries] %></value>
<description>The number of retries for writing blocks to the data nodes, before we
signal failure to the application.
</description>
</property>
<property>
<name>dfs.data.dir</name>
<value><%= node[:hadoop][:hdfs][:dfs_data_dir].join(",") %></value>
<description>Determines where on the local filesystem an DFS data node should
store its blocks. If this is a comma-delimited list of
directories, then data will be stored in all named directories,
typically on different devices. Directories that do not exist
are ignored.
</description>
</property>
<property>
<name>dfs.datanode.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_address] %></value>
<description>The address where the datanode server will listen to. If the port is
0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_data_dir_perm] %></value>
<description>Permissions for the directories on on the local filesystem where the
DFS data node store its blocks. The permissions can either be
octal or symbolic.
</description>
</property>
<property>
<name>dfs.datanode.dns.interface</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_dns_interface] %></value>
<description>The name of the Network Interface from which a data node should
report its IP address.
</description>
</property>
<property>
<name>dfs.datanode.dns.nameserver</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_dns_nameserver] %></value>
<description>The host name or IP address of the name server (DNS) which a DataNode
should use to determine the host name used by the NameNode for
communication and display purposes.
</description>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_du_reserved] %></value>
<description>Reserved space in bytes per volume. Always leave this much space free
for non dfs use.
</description>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_failed_volumes_tolerated] %></value>
<description>The number of volumes that are allowed to fail before a datanode
stops offering service. By default any volume failure will
cause a datanode to shutdown.
</description>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_handler_count] %></value>
<description>The number of server threads for the datanode.
</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_http_address] %></value>
<description>The datanode http server address and port. If the port is 0 then the
server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.https.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_https_address] %></value>
<description>
</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_datanode_ipc_address] %></value>
<description>The datanode ipc server address and port. If the port is 0 then the
server will start on a free port.
</description>
</property>
<property>
<name>dfs.default.chunk.view.size</name>
<value><%= node[:hadoop][:hdfs][:dfs_default_chunk_view_size] %></value>
<description>The number of bytes to view for a file on the browser.
</description>
</property>
<property>
<name>dfs.df.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_df_interval] %></value>
<description>Disk usage statistics refresh interval in msec.
</description>
</property>
<property>
<name>dfs.heartbeat.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_heartbeat_interval] %></value>
<description>Determines datanode heartbeat interval in seconds.
</description>
</property>
<property>
<name>dfs.hosts</name>
<value><%= node[:hadoop][:hdfs][:dfs_hosts] %></value>
<description>Names a file that contains a list of hosts that are permitted to
connect to the namenode. The full pathname of the file must be
specified. If the value is empty, all hosts are permitted.
</description>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value><%= node[:hadoop][:hdfs][:dfs_hosts_exclude] %></value>
<description>Names a file that contains a list of hosts that are not permitted to
connect to the namenode. The full pathname of the file must be
specified. If the value is empty, no hosts are excluded.
</description>
</property>
<property>
<name>dfs.http.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_http_address] %></value>
<description>The address and the base port where the dfs namenode web ui will
listen on. If the port is 0 then the server will start on a
free port.
</description>
</property>
<property>
<name>dfs.https.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_https_address] %></value>
<description>
</description>
</property>
<property>
<name>dfs.https.client.keystore.resource</name>
<value><%= node[:hadoop][:hdfs][:dfs_https_client_keystore_resource] %></value>
<description>Resource file from which ssl client keystore information will be
extracted.
</description>
</property>
<property>
<name>dfs.https.enable</name>
<value><%= node[:hadoop][:hdfs][:dfs_https_enable] %></value>
<description>Decide if HTTPS(SSL) is supported on HDFS.
</description>
</property>
<property>
<name>dfs.https.need.client.auth</name>
<value><%= node[:hadoop][:hdfs][:dfs_https_need_client_auth] %></value>
<description>Whether SSL client certificate authentication is required.
</description>
</property>
<property>
<name>dfs.https.server.keystore.resource</name>
<value><%= node[:hadoop][:hdfs][:dfs_https_server_keystore_resource] %></value>
<description>Resource file from which ssl server keystore information will be
extracted.
</description>
</property>
<property>
<name>dfs.max.objects</name>
<value><%= node[:hadoop][:hdfs][:dfs_max_objects] %></value>
<description>The maximum number of files, directories and blocks dfs supports. A
value of zero indicates no limit to the number of objects that
dfs supports.
</description>
</property>
<property>
<name>dfs.name.dir</name>
<value><%= node[:hadoop][:hdfs][:dfs_name_dir].join(",") %></value>
<description>Determines where on the local filesystem the DFS name node should
store the name table(fsimage). If this is a comma-delimited
list of directories then the name table is replicated in all of
the directories, for redundancy.
</description>
</property>
<property>
<name>dfs.name.edits.dir</name>
<value><%= node[:hadoop][:hdfs][:dfs_name_edits_dir].join(",") %></value>
<description>Determines where on the local filesystem the DFS name node should
store the transaction (edits) file. If this is a
comma-delimited list of directories then the transaction file
is replicated in all of the directories, for redundancy.
Default value is same as dfs.name.dir.
</description>
</property>
<property>
<name>dfs.namenode.decommission.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_decommission_interval] %></value>
<description>Namenode periodicity in seconds to check if decommission is complete.
</description>
</property>
<property>
<name>dfs.namenode.decommission.nodes.per.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_decommission_nodes_per_interval] %></value>
<description>The number of nodes namenode checks if decommission is complete in
each dfs.namenode.decommission.interval.
</description>
</property>
<property>
<name>dfs.namenode.delegation.key.update-interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_key_update_interval] %></value>
<description>The update interval for master key for delegation tokens in the
namenode in milliseconds.
</description>
</property>
<property>
<name>dfs.namenode.delegation.token.max-lifetime</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_token_max_lifetime] %></value>
<description>The maximum lifetime in milliseconds for which a delegation token is
valid.
</description>
</property>
<property>
<name>dfs.namenode.delegation.token.renew-interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_delegation_token_renew_interval] %></value>
<description>The renewal interval for delegation token in milliseconds.
</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_handler_count] %></value>
<description>The number of server threads for the namenode.
</description>
</property>
<property>
<name>dfs.namenode.logging.level</name>
<value><%= node[:hadoop][:hdfs][:dfs_namenode_logging_level] %></value>
<description>The logging level for dfs namenode. Other values are "dir"(trac e
namespace mutations), "block"(trace block under/over
replications and block creations/deletions), or "all".
</description>
</property>
<property>
<name>dfs.permissions</name>
<value><%= node[:hadoop][:hdfs][:dfs_permissions] %></value>
<description>If "true", enable permission checking in HDFS. If "false", permission
checking is turned off, but all other behavior is unchanged.
Switching from one parameter value to the other does not change
the mode, owner or group of files or directories.
</description>
</property>
<property>
<name>dfs.permissions.supergroup</name>
<value><%= node[:hadoop][:hdfs][:dfs_permissions_supergroup] %></value>
<description>The name of the group of super-users.
</description>
</property>
<property>
<name>dfs.replication</name>
<value><%= node[:hadoop][:hdfs][:dfs_replication] %></value>
<description>Default block replication. The actual number of replications can be
specified when the file is created. The default is used if
replication is not specified in create time.
</description>
</property>
<property>
<name>dfs.replication.considerLoad</name>
<value><%= node[:hadoop][:hdfs][:dfs_replication_considerLoad] %></value>
<description>Decide if chooseTarget considers the target's load or not.
</description>
</property>
<property>
<name>dfs.replication.interval</name>
<value><%= node[:hadoop][:hdfs][:dfs_replication_interval] %></value>
<description>The periodicity in seconds with which the namenode computes
repliaction work for datanodes.
</description>
</property>
<property>
<name>dfs.replication.max</name>
<value><%= node[:hadoop][:hdfs][:dfs_replication_max] %></value>
<description>Maximal block replication.
</description>
</property>
<property>
<name>dfs.replication.min</name>
<value><%= node[:hadoop][:hdfs][:dfs_replication_min] %></value>
<description>Minimal block replication.
</description>
</property>
<property>
<name>dfs.safemode.extension</name>
<value><%= node[:hadoop][:hdfs][:dfs_safemode_extension] %></value>
<description>Determines extension of safe mode in milliseconds after the threshold
level is reached.
</description>
</property>
<property>
<name>dfs.safemode.threshold.pct</name>
<value><%= node[:hadoop][:hdfs][:dfs_safemode_threshold_pct] %></value>
<description>Specifies the percentage of blocks that should satisfy the minimal
replication requirement defined by dfs.replication.min. Values
less than or equal to 0 mean not to start in safe mode. Values
greater than 1 will make safe mode permanent.
</description>
</property>
<property>
<name>dfs.secondary.http.address</name>
<value><%= node[:hadoop][:hdfs][:dfs_secondary_http_address] %></value>
<description>The secondary namenode http server address and port. If the port is 0
then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.support.append</name>
<value><%= node[:hadoop][:hdfs][:dfs_support_append] %></value>
<description>Does HDFS allow appends to files? This is currently set to false
because there are bugs in the "append code" and is not
supported in any prodction cluster.
</description>
</property>
<property>
<name>dfs.web.ugi</name>
<value><%= node[:hadoop][:hdfs][:dfs_web_ugi] %></value>
<description>The user account used by the web interface. Syntax:
USERNAME,GROUP1,GROUP2, ...
</description>
</property>
</configuration>