Configuration file

# Include this field if you want to enable an embedded M3 Coordinator instance
coordinator:
  # Address for M3 Coordinator to listen for traffic.
  listenAddress: <url>

# Configuration for a DB node (required)
db:
  # Database index configuration
  index:
    # The maximum number of outstanding QueryID requests to service concurrently
    maxQueryIDsConcurrency: <int>
    # Limit on the max number of states used by a regexp deterministic finite automaton
    # Default = 10000
    regexpDFALimit: <int>
    # Limit on the max number of bytes used by the finite state automaton
    # Default 10mb (10 million)
    regexpFSALimit: <int>
    # Likelihood that an incoming write is written to the next block when close to the block boundary
    forwardIndexProbability: <float>
    # Threshold for forward writes, as a fraction of the given namespace's bufferFuture
    forwardIndexThreshold: <float>
  # Configuration options to transform incoming writes
  transforms:
    # Truncatation type applied to incoming writes, valid options: [none, block]
    # none = No truncation occurs
    # block = Truncates incoming writes to the block boundary preceding this point's timestamp
    truncateBy: <string>
    # What to set all incoming write values to
    forceValue: <float>
  # Minimum log level emitted.
  logging:
    # Log file location
    file: <string>
    # Error logging level
    level: <string>
    # Key-value pairs to send to logging
    fields: <map_of_strings>

  # Options for emitting metrics
  metrics:
    # Metrics scope
    scope:
      # Prefix prepended to metrics collected
      prefix: <string>
      # Reporting frequendy of metrics collected
      reportingInterval: <duration>
      # Tags shared by metrics collected
      tags: <map of strings>
    # Configuration for a Prometheus reporter (if used)
    prometheus:
      # Metrics collection endpoint for application
      # Default = "/metrics"
      handlerPath: <string>
      # Listen address for metrics
      # Default = "0.0.0.0:7203"
      listenAddress: <url>
      # The default Prometheus type to use for Tally timers, valid options: [histogram, summary]
      timerType: <string>
      # Sets the default histogram objectives used by the reporter
      defaultHistogramBuckets:
        # Upper bound of the bucket
        upper: <float>
      # Sets the default summary objectives used by the reporter
      defaultSummaryObjectives:
        percentile: <float>
        allowedError: <float>
      # What to do with errors when listening on the specified listen address or registering a metric with Prometheus, valid options: [stderr, log, none]
      # Default = panic and stop exceution of go routine
      onError: <string>
    # Metric sanitization type, valid options: [none, m3, prometheus]
    # Default = "none"
    sanitization: <string>
    # Metrics sampling rate. min=0.0, max=1.0
    samplingRate: <float>
    # Enable Go runtime metrics, valid options: [none, simple, moderate, detailed]
    # See https://github.com/m3db/m3/blob/master/src/x/instrument/extended.go#L39:L64 for more details
    extended: <string>

  # Host and port to listen for the node service
  # Default = "0.0.0.0:9000"
  listenAddress: <url>
  # Host and port to listen for the cluster service
  # Default = "0.0.0.0:9001"
  clusterListenAddress: <url>
  # Host and port to listen for the node json/http APIs (Primarily used for debugging)
  # Default = "0.0.0.0:9002"  
  httpNodeListenAddress: <url>
  # Host and port to listen for the cluster json/http APIs (Primarily used for debugging)
  # Default = "0.0.0.0:9003"  
  httpClusterListenAddress: <url>
  # Address to listen on for debug APIs (pprof, etc).
  # Default = "0.0.0.0:9004"
  debugListenAddress: <url>

  # Configuration for resolving the instances host ID.
  hostID:
    # Resolver used to match the host ID, valid options: [hostname, config, environment, file]
    resolver: <string>
    # If using "environment" resolver, is the environment variable specified host ID
    envVarName: <string>
    # If using "file" resolver, is the file path to specified host ID
    file:
      # Path to file
      path: <string>
      # Timeout to wait for file
      timeout: <duration>
    # If using "config" resolver, is the config specified host ID
    # Default = "m3db_local"
    value: m3db_local
    # If using "hostname" resolver, is the hostname of the specified host
    hostname:
      # Custom format to use, using go templates.
      format: <string>

  client:
    # The consistency level for writing to a cluster, valid options: [none, one, majority, all]
    writeConsistencyLevel: <string>
    # The consistency level for reading from a cluster, valid options: [none, one, unstrict_majority, majority, unstrict_all, all]
    readConsistencyLevel: <string>
    # The timeout for writing data
    writeTimeout: <duration>
    # The fetch timeout for any given query
    # Range =  30s to 5m
    fetchTimeout: <duration>
    # The cluster connect timeout    
    connectTimeout: <duration>
    # Configuration for retrying write operations
    writeRetry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Add randomness to wait intervals
      jitter: <bool>
    # Configuration for retrying fetch operations
    fetchRetry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Add randomness to wait intervals
      jitter: <bool>
    # The amount of times a background check fails before a connection is taken out of consideration
    backgroundHealthCheckFailLimit: <int>
    # The factor of the host connect time when sleeping between a failed health check and the next check
    backgroundHealthCheckFailThrottleFactor: <float>

  # Initial garbage collection target percentage
  # Range = 0 to 100
  gcPercentage: <int>

  # Tick configuration for background processing of block rotation
  tick:
    # Batch size to process series together during a tick
    seriesBatchSize: <int>
    # Tick per series sleep at the completion of a tick batch
    perSeriesSleepDuration: <duration>
    # Minimum tick interval for the node
    minimumInterval: <duration>
  # Write new series asynchronously for fast ingestion of new ID bursts
  writeNewSeriesAsync: <bool>
  # Write new series backoff between batches of new series insertions
  writeNewSeriesBackoffDuration: <duration>

  # Config for node bootstrappers
  bootstrap:
    # Boostrap mode, valid options: [default, prefer_peers, exclude_commitlog]
    mode: <string>
    # File system bootstrapper
    filesystem:
      # What version, if any, to migrate existing data filesets to
      migration:
        # Upgrade filesets to specified version
        targetMigrationVersion: <int>
        # Number of concurrent workers performing migration
        concurrency: <int>
    # Configuration for commitlog bootstrapper 
    commitlog:
      # Return unfulfilled when encountering corrupt ommit log files
      returnUnfulfilledForCorruptCommitLogFiles: <bool>
    # Config for peers bootstrapper
    peers:
      # How many shards in parallel to stream in memory data between peers
      # Defaults = numCPU
      streamShardConcurrency: <int>
      # How many shards in parallel to stream for historical streamed between peers
      # Default  = numCPU / 2
      streamPersistShardConcurrency: <int>
      # Controls how many shards in parallel to flush for historical data streamed between peers
      # Default = 1
      streamPersistShardFlushConcurrency: <int>
    # Whether individual bootstrappers cache series metadata across all namespaces, shards, or blocks
    cacheSeriesMetadata: <bool>
    # Concurrency for building index segments
    indexSegmentConcurrency: <int>
    # Verification checks to enable during a bootstrap
    verify:
      verifyIndexSegments: <bool>

  # Block retrieval policy
  blockRetrieve:
    # Concurrency to fetch blocks from disk
    fetchConcurrency: <int>
    # Globally enables/disables callbacks used to cache blocks fetched from disk
    cacheBlocksOnRetrieve: <bool>
    
  # Caching policy for database blocks
  cache:
    # Series cache policy
    series:
      # Policy to use, valid options: [none, all, recently_read, lru]
      policy: <string>
      # If using LRU policy
      lru:
        maxBlocks: <int>
        eventsChannelSize: <int>
    # PostingsList cache policy
    postingsList:
      size: <int>
      cacheRegexp: <bool>
      cacheTerms: <bool>
    # Compiled regexp cache for query regexp
    regexp:
      size: <int>

  # Commit log configuration
  commitlog:
    # Maximum number of bytes buffered before flushing the commitlog
    flushMaxBytes: <int>
    # Maximum amount of time data can remain buffered before flushing the commitlog
    flushEvery: <duration>
    # Configuration for the commitlog queue. High throughput setups may require higher
    # values. Higher values use more memory.
    queue:
      # How to scale calculation size, valid options: [fixed, percpu]
      calculationType: <string>
      size: <int>
    # The Golang channel that implements the commit log queue
    queueChannel:
      # How to scale calculation size, valid options: [fixed, percpu]
      calculationType: <string>
      size: <int>

  # Configuration for node filesystem
  filesystem:
    # Directory to store M3DB data in
    filePathPrefix: <string>
    # Write buffer size
    writeBufferSize: <int>
    # Data read buffer size
    dataReadBufferSize: <int>
    # Info metadata file read buffer size
    infoReadBufferSize: <int>
    # Seek data read buffer size
    seekReadBufferSize: <int>
    # Disk flush throughput limit in Mb/s
    throughputLimitMbps: <float>
    # Disk flush throughput check interval
    throughputCheckEvery: <int>
    # New file permissions mode to use when creating files, specified as three digits
    newFileMode: <string>
    # New file permissions mode to use when creating directories, specified as three digits
    newDirectoryMode: <string>
    # mmap configuration
    mmap:
      # Huge pages config (Linux only)
      hugeTLB:
        enabled: <bool>
        # Threshold on which to use huge TLB
        threshold: <int>
    # Forces the mmap that stores the index lookup bytes to be an anonymous region in memory
    force_index_summaries_mmap_memory: <bool>
    # Forces the mmap that stores the bloom filter bytes to be an anonymous region in memory
    force_bloom_filter_mmap_memory: <bool>
    # Target false positive percentage for the bloom filters for the fileset files
    bloomFilterFalsePositivePercent: <float>

  # Policy for replicating data between clusters
  replication:
    # Clusters to replicate data from
    clusters:
      # Cluster name
      name: <string>
      # Configuration used to construct a client
      client:
        config:
        # Consistency level for writing to a cluster, valid options: [none, one, majority, all]
        writeConsistencyLevel: <string>
        # Consistency level for reading from a cluster, valid options: [none, one, unstrict_majority, majority, unstrict_all, all]
        readConsistencyLevel: <string>
        # Consistency level for connecting to a cluster, valid options: [unknown, any, all, unrestrict_all, one, none, majority, unstrict_majority]
        connectConsistencyLevel: <string>
        # The timeout for writing data
        writeTimeout: <duration>
        # The fetch timeout for any given query
        # Range =  30s to 5m
        fetchTimeout: <duration>
        # The cluster connect timeout    
        connectTimeout: <duration>
        # Configuration for retrying write operations
        writeRetry:
          initialBackoff: <duration>
          # Factor for exponential backoff
          backoffFactor: <float>
          # Maximum backoff time
          maxBackoff: <duration>
          # Maximum retry attempts
          maxRetries: <int>
          # Add randomness to wait intervals
          jitter: <bool>
        # Configuration for retrying fetch operations
        fetchRetry:
          initialBackoff: <duration>
          # Factor for exponential backoff
          backoffFactor: <float>
          # Maximum backoff time
          maxBackoff: <duration>
          # Maximum retry attempts
          maxRetries: <int>
          # Add randomness to wait intervals
          jitter: <bool>
        # Log error sample rate
        logErrorSampleRate: <float>
        # Log host write error sample rate
        logHostWriteErrorSampleRate: <float>
        # Log host fetch error sample rate
        logHostFetchErrorSampleRate: <float>
        # The amount of times a background check fails before a connection is taken out of consideration
        backgroundHealthCheckFailLimit: <int>
        # The factor of the host connect time when sleeping between a failed health check and the next check
        backgroundHealthCheckFailThrottleFactor: <float>
        # Hashing of IDs to shards configuration
        hashing:
          # Murmur32 seed value
          seed: <int>
        # Configuration specific to running in ProtoDataMode
        proto:
          # Enable proto mode
          enabled: <bool>
          # Load user schema from client configuration into schema registry at startup/initialization time
          schema_registry:
            messageName: <string>
            schemaDeployID: <string>
            schemaFilePath: <string>
        # Worker pool size for async write requests
        asyncWriteWorkerPoolSize: <int>
        # Maximum concurrency for async write requests
        asyncWriteMaxConcurrency: <int>
        # Offsets all writes by specified duration into the past
        writeTimestampOffset: <duration>
        # Sets the number of blocks to retrieve in a single batch from the remote peer
        # Default = 4096
        fetchSeriesBlocksBatchSize: <int>
        # Whether or not to write to shards that are initializing
        # Defaults = true
        writeShardsInitializing: <bool>
        # Whether or not writes to leaving shards count towards consistency
        # Default = false
        # NOTE: shardsLeavingCountTowardsConsistency and shardsLeavingAndInitializingCountTowardsConsistency both
        # cannot be true
        shardsLeavingCountTowardsConsistency: <bool>
        # Whether or not writes to both leaving and initializing shards as pair counts toward consistency
        # Default = false
        # NOTE: shardsLeavingCountTowardsConsistency and shardsLeavingAndInitializingCountTowardsConsistency both
        # cannot be true
        shardsLeavingAndInitializingCountTowardsConsistency: <bool>
  # Specifies the pooling policy
  pooling:
    # Initial alloc size for a block
    blockAllocSize: <int>
    # Thrift bytes pool max bytes slice allocation for a single binary field
    thriftBytesPoolAllocSize: <int>
    # General pool type, valid options: [simple]
    type: <string>
    # Bucketized pool configuration
    bytesPool:
      # Configuration for buckets in a pool
      buckets:
        # Number of items in the bucket
        count: <int>
        # Capacity of each item in the bucket
        capacity: <int>
      watermark:
        # The low watermark to start refilling the pool
        # min=0.0, max=1.0
        low: <float>
        # The high watermark to start refilling the pool
        # min=0.0, max=1.0
        high: <float>
  hashing:
    # Murmur32 seed value
    seed: <int>
  # Configuration specific to running in ProtoDataMode
  proto:
    # Enable proto mode
    enabled: <bool>
    # Load user schema from client configuration into schema registry at startup/initialization time
    schema_registry:
      messageName: <string>
      schemaDeployID: <string>
      schemaFilePath: <string>
  # Enables tracing, if nothing configured, tracing is disabled
  tracing:
    # Name for tracing service
    serviceName: <string>
    # Tracing backen to use, valid options: [jaeger, lightstep]
    backend: <string>
    # If using Jaeger, options to send to tracing backend
    jaeger:
      # Service name to use
      serviceName: <string>
      # Disable jaeger tracing
      disabled: <bool>
      # Enable RPC metrics
      rpc_metrics: <bool>
      # Tags to send to tracing backend as key/value pairs
      tags: <array_of_strings>
      # Jaeger sampling configuration
      sampler:
        # Sampler type, valid options: [const, probabilistic, rateLimiting, remote]
        type: <string>
        # Value passed to sampler
        # For "const" sampler, 0 or 1 for always false/true respectively
        # For "probabilistic" sampler, a probability between 0 and 1
        # For "rateLimiting" sampler, the number of spans per second
        # For "remote" sampler, param is the same as for "probabilistic" and indicates the initial sampling rate before the actual rate
        param: <float>
        # URL of sampling manager that can provide sampling strategy to this service
        samplingServerURL: <string>
        # How often the remotely controlled sampler polls sampling manager for the appropriate sampling strategy
        samplingRefreshInterval: <duration>
        # Maximum number of operations that the PerOperationSampler keeps track of
        maxOperations: <int>
        # For applications that require late binding of span name via explicit call to SetOperationName when using PerOperationSampler
        operationNameLateBinding: <bool>
      # Jaeger sampling configuration
      reporter:
        # How many spans the reporter can keep in memory before it starts dropping new spans
        queueSize: <int>
        # Enable LoggingReporter that runs in parallel with the main reporter and logs all submitted spans
        logSpans: <bool>
        # Instructs reporter to send spans to jaeger-agent at this address
        localAgentHostPort: <string>
        # Disables udp connection helper that periodically re-resolves the agent's hostname and reconnects if there was a change
        disableAttemptReconnecting: <bool>
        # Send spans to jaeger-collector at this URL
        collectorEndpoint: <url>
        # Include a user for basic http authentication when sending spans to jaeger-collector
        user: <string>
        # Include a password for basic http authentication when sending spans to jaeger collector
        password: <string>
        # Add these headers to the http request when reporting spans
        http_headers: <array_of_strings>
      # Values for the header keys that Jaeger uses
      headers:
        # Forces the trace to be sampled as "debug" trace
        jaegerDebugHeader: <string>
        # Submit baggage in this header when a root span does not exist
        jaegerBaggageHeader: <string>
        # Use to propagate tracing context
        TraceContextHeaderName: <string>
        # Use to propagate baggage, must be lower-case
        traceBaggageHeaderPrefix: <string>
      # Baggage restrictions manager to whitelist certain baggage keys
      baggage_restrictions:
        # Startup failure mode to deny or allow writing of baggage before restrictions are retreived from Jaeger agent
        denyBaggageOnInitializationFailure: <bool>
        # hostPort of jaeger-agent's baggage restrictions server
        hostPort: <string>
        # How often the baggage restriction manager polls jaeger-agent for the most recent baggage restrictions
        refreshInterval: <duration>
      # Configures the throttler for the rate at which the client may send debug requests
      throttler:
        # Host port of jaeger-agent's credit server
        hostPort: <string>
        # How often the throttler polls jaeger-agent for more throttling credits
        refreshInterval: <duration>
        # Should throttler synchronously fetch credits from the agent when an operation is seen for the first time
        synchronousInitialization: <bool>
    # If using Lightstep, options to send to tracing backend
    lightstep:
      # API key for your LightStep project
      access_token: <string>
      # Host, port, and plaintext option to use for the collector
      collector: <url>
      # Tags to send to tracing backend as key/value pairs
      tags: <array_of_strings>
      # Host, port, and plaintext option to use for the LightStep web API
      lightstep_api: <url>
      # Maximum number of spans buffered before sending them to a collector
      max_buffered_spans: <int>
      # Maximum allowable size (in characters) of an OpenTracing logging key
      max_log_key_lenmax_log_key_len: <int>
      # Maximum allowable size (in characters) of an OpenTracing logging value. Longer values are truncated. Only applies to ariable-length value types (strings, interface{}, etc)
      max_log_value_len: <int>
      # Limits the number of logs in a single span
      max_logs_per_span: <int>
      # Limits the size in bytes of grpc messages sent by a client
      grpc_max_call_send_msg_size_bytes: <int>
      # Maximum duration of time between sending spans to a collector
      reporting_period: <duration>
      # Minimum duration of time between sending spans to a collector
      min_reporting_period: <duration>
      # Timeout of sending spans to collector
      report_timeout: <duration>
      # Turn log events on all Spans into no-ops
      drop_span_logs: <bool>
      # Force use of HTTP connections
      use_http: <bool>
      # Force use of gRPC connection
      usegrpc: <bool>
      # Reconnect timeout for connections to collector
      reconnect_period: <duration>
      # Enable Lightstep meta event logging
      meta_event_reporting_enabled: <bool>
  # Defines limits for wide operations which optimize for query completeness across arbitary query ranges rather than speed
  limits:
    # Upper limit on time series bytes read from disk within a given lookback period
    maxRecentlyQueriedSeriesDiskBytesRead:
      # Max value for the limit
      value: <int>
      # The period in which a resource limit is enforced
      lookback: <duration>
    # Upper limit on time series blocks count within a given lookback period
    maxRecentlyQueriedSeriesBlocks:
      # Max value for the limit
      value: <int>
      # The period in which a resource limit is enforced
      lookback: <duration>
    # Maximum number of outstanding write requests that the server allows before it begins rejecting requests
    maxOutstandingWriteRequests: <int>
    # Maximum number of outstanding read requests that the server allows before it begins rejecting requests
    maxOutstandingReadRequests: <int>
    # Maximum number of bytes that can be loaded into memory as part of the repair process
    maxOutstandingRepairedBytes: <int>
    # Maximum number of encoders permitted in a block
    maxEncodersPerBlock: <int>
    # Write new series limit per second to limit overwhelming during new ID bursts
    writeNewSeriesPerSecond: <int>
  # Configuration for wide operations that differ from regular paths by optimizing for query completeness across arbitary query ranges rather than speed.
  wide:
    # Batch size for wide operations. This corresponds to how many series are processed within a single "chunk"
    # Larger batch sizes complete the query faster, but increase memory consumption
    batchSize: <int>
  # TChannel configuration
  tchannel:
    # Maximum idle time
    maxIdleTime: <duration>
    # Idle check interview
    idleCheckInterval: <duration>
  # Debug configuration
  debug:
    # Sets runtime.SetMutexProfileFraction to report mutex contention events
    # See https://tip.golang.org/pkg/runtime/#SetMutexProfileFraction for more details about the value
    mutexProfileFraction: <int>
    # Sets runtime.BlockProfileRate to report blocking events
    # See https://golang.org/pkg/runtime/#SetBlockProfileRate for more details about the value.    
    blockProfileRate: <int>
  # Enable cold writes for all namespaces
  forceColdWritesEnabled: <bool>
  # etcd configuration
  discovery:
    # The type of discovery configuration used, valid options: [config, m3db_single_node, m3db_cluster, m3aggregator_cluster]
    type: <string>
    # Defines M3DB discovery via etcd
    m3dbCluster:
      env: <string>
      zone: <string>
      endpoints: <array_of_strings>