docs/example_config.yml
ipfs:
api_url: http://localhost:5001 # IPFS API endpoint, also IPFS_API_URL in env
gateway_url: http://localhost:8080 # IPFS gateway, also IPFS_GATEWAY_URL in env
partial_size: 256KB # Size of items considered to be partial (when unreferenced)
opensearch:
url: http://localhost:9200 # Also OPENSEARCH_URL in env
bulk_indexer_workers: 16 # Workers to use for bulk writes.
bulk_flush_bytes: 5MB # Bytesize treshold for bulk writes.
bulk_flush_timeout: 5m # Time treshold for bulk writes.
bulk_getter_batch_size: 48 # Item treshold for execution of bulk gets.
bulk_getter_batch_timeout: 150ms # Time treshold for bulk gets.
redis:
addresses: # Address(es) to Redis server(s).
- localhost:6379
amqp:
url: amqp://guest:guest@localhost:5672/ # Also AMQP_URL in env.
max_reconnect: 100 # Maximum number of reconnect attempts
reconnect_time: 2s # Time to wait between reconnects
tika:
url: http://localhost:8081 # tika-extractor endpoint URL, also TIKA_EXTRACTOR in environment.
timeout: 5m # Timeout for requests to tika-extractor.
max_file_size: 4GB # Don't attempt to extract metadata for resources larger than this.
nsfw:
url: http://localhost:3000 # URL of nsfw-server.
timeout: 5m # Timeout for metadata requests for the server.
max_file_size: 1GB # Don't attempt to get metadata for files over this size.
instrumentation:
sampling_ratio: 0.01 # Ratio of requests to sample for tracing. OTEL_TRACE_SAMPLER_ARG in env.
jaeger_endpoint: http://localhost:14268/api/traces # HTTP jaeger.thrift endpoint for tracing. OTEL_EXPORTER_JAEGER_ENDPOINT in env.
crawler:
direntry_buffer_size: 8192 # Buffer this many directory entries between listing and queue'ing
min_update_age: 1h # Minimum time between updating `last-seen` on objects.
stat_timeout: 1m # Request timeout for Stat() calls.
direntry_timeout: 1m # Request timeout for Ls() calls.
max_dirsize: 32768 # Don't index directories larger than this (contained items will be queue'd nonetheless).
sniffer:
lastseen_expiration: 1h # Expire items in lastseen/dedup buffer after this time.
lastseen_prunelen: 32768 # Expire lastseen buffer when size exceeds this.
logger_timeout: 1m # Throw timeout error when no log messages arrive
buffer_size: 512 # Size of the channels buffering between yielder, filter and adder
indexes:
files:
name: ipfs_files # Name of index to use, internally as well as in OpenSearch.
prefix: f # Key prefix for Redis cache.
directories:
name: ipfs_directories
prefix: d
invalids:
name: ipfs_invalids
prefix: i
partials:
name: ipfs_partials
prefix: p
queues:
files:
name: files # Name of RabbitMQ queue to use.
directories:
name: directories
hashes:
name: hashes
workers:
hash_workers: 70 # Amount of workers for various resources. Also HASH_WORKERS in env.
file_workers: 120 # Also FILE_WORKERS in env.
directory_workers: 70 # Also DIRECTORY in env.
ipfs_max_connections: 1000 # Maximum simultaneous connections to IPFS.
extractor_max_connections: 100 # Maximum simultaneous connections to extractors.