ipfs-search/ipfs-search

View on GitHub
docs/example_config.yml

Summary

Maintainability
Test Coverage
ipfs:
  api_url: http://localhost:5001                      # IPFS API endpoint, also IPFS_API_URL in env
  gateway_url: http://localhost:8080                  # IPFS gateway, also IPFS_GATEWAY_URL in env
  partial_size: 256KB                                 # Size of items considered to be partial (when unreferenced)
opensearch:
  url: http://localhost:9200                          # Also OPENSEARCH_URL in env
  bulk_indexer_workers: 16                            # Workers to use for bulk writes.
  bulk_flush_bytes: 5MB                               # Bytesize treshold for bulk writes.
  bulk_flush_timeout: 5m                              # Time treshold for bulk writes.
  bulk_getter_batch_size: 48                          # Item treshold for execution of bulk gets.
  bulk_getter_batch_timeout: 150ms                    # Time treshold for bulk gets.
redis:
    addresses:                                        # Address(es) to Redis server(s).
        - localhost:6379
amqp:
  url: amqp://guest:guest@localhost:5672/             # Also AMQP_URL in env.
  max_reconnect: 100                                  # Maximum number of reconnect attempts
  reconnect_time: 2s                                  # Time to wait between reconnects
tika:
  url: http://localhost:8081                          # tika-extractor endpoint URL, also TIKA_EXTRACTOR in environment.
  timeout: 5m                                         # Timeout for requests to tika-extractor.
  max_file_size: 4GB                                  # Don't attempt to extract metadata for resources larger than this.
nsfw:
    url: http://localhost:3000                        # URL of nsfw-server.
    timeout: 5m                                       # Timeout for metadata requests for the server.
    max_file_size: 1GB                                # Don't attempt to get metadata for files over this size.
instrumentation:
  sampling_ratio: 0.01                                # Ratio of requests to sample for tracing. OTEL_TRACE_SAMPLER_ARG in env.
  jaeger_endpoint: http://localhost:14268/api/traces  # HTTP jaeger.thrift endpoint for tracing. OTEL_EXPORTER_JAEGER_ENDPOINT in env.
crawler:
  direntry_buffer_size: 8192                          # Buffer this many directory entries between listing and queue'ing
  min_update_age: 1h                                  # Minimum time between updating `last-seen` on objects.
  stat_timeout: 1m                                    # Request timeout for Stat() calls.
  direntry_timeout: 1m                                # Request timeout for Ls() calls.
  max_dirsize: 32768                                  # Don't index directories larger than this (contained items will be queue'd nonetheless).
sniffer:
  lastseen_expiration: 1h                             # Expire items in lastseen/dedup buffer after this time.
  lastseen_prunelen: 32768                            # Expire lastseen buffer when size exceeds this.
  logger_timeout: 1m                                  # Throw timeout error when no log messages arrive
  buffer_size: 512                                    # Size of the channels buffering between yielder, filter and adder
indexes:
  files:
    name: ipfs_files                                  # Name of index to use, internally as well as in OpenSearch.
    prefix: f                                         # Key prefix for Redis cache.
  directories:
    name: ipfs_directories
    prefix: d
  invalids:
    name: ipfs_invalids
    prefix: i
  partials:
    name: ipfs_partials
    prefix: p
queues:
  files:
    name: files                                       # Name of RabbitMQ queue to use.
  directories:
    name: directories
  hashes:
    name: hashes
workers:
  hash_workers: 70                                    # Amount of workers for various resources. Also HASH_WORKERS in env.
  file_workers: 120                                   # Also FILE_WORKERS in env.
  directory_workers: 70                               # Also DIRECTORY in env.
  ipfs_max_connections: 1000                          # Maximum simultaneous connections to IPFS.
  extractor_max_connections: 100                      # Maximum simultaneous connections to extractors.