190 lines
5.5 KiB
Text
190 lines
5.5 KiB
Text
|
|
# Observability Configuration Schema
|
||
|
|
# Unified schema for centralized logging, metrics, health checks, and tracing
|
||
|
|
|
||
|
|
{
|
||
|
|
# Observability configuration for services
|
||
|
|
ObservabilityConfig = {
|
||
|
|
# Enable/disable observability system-wide
|
||
|
|
enabled | Bool | default = true,
|
||
|
|
|
||
|
|
# Logging Configuration
|
||
|
|
logging | {
|
||
|
|
# Enable structured JSON logging
|
||
|
|
enabled | Bool | default = true,
|
||
|
|
|
||
|
|
# Log level: debug, info, warn, error
|
||
|
|
level | String | default = "info",
|
||
|
|
|
||
|
|
# Log format: json (for Loki ingestion) or pretty (development)
|
||
|
|
format | String | default = "json",
|
||
|
|
|
||
|
|
# RUST_LOG environment filter (granular module-level filtering)
|
||
|
|
filter | String | optional,
|
||
|
|
|
||
|
|
# Output configuration
|
||
|
|
output | {
|
||
|
|
# Log output destination: stdout, file, loki
|
||
|
|
destination | String | default = "stdout",
|
||
|
|
|
||
|
|
# File path for file output
|
||
|
|
file_path | String | optional,
|
||
|
|
|
||
|
|
# Loki endpoint (e.g., http://localhost:3100)
|
||
|
|
loki_endpoint | String | optional,
|
||
|
|
|
||
|
|
# Labels to attach to all Loki entries (labels become queryable)
|
||
|
|
loki_labels | {
|
||
|
|
} | optional,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Structured field configuration
|
||
|
|
fields | {
|
||
|
|
# Include service name
|
||
|
|
service_name | Bool | default = true,
|
||
|
|
|
||
|
|
# Include timestamp (RFC3339)
|
||
|
|
timestamp | Bool | default = true,
|
||
|
|
|
||
|
|
# Include log level
|
||
|
|
level | Bool | default = true,
|
||
|
|
|
||
|
|
# Include caller location (file:line)
|
||
|
|
caller | Bool | default = false,
|
||
|
|
|
||
|
|
# Include span context (trace IDs, span IDs)
|
||
|
|
spans | Bool | default = true,
|
||
|
|
|
||
|
|
# Custom metadata fields
|
||
|
|
custom | {
|
||
|
|
} | optional,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Performance optimization
|
||
|
|
sampling | {
|
||
|
|
# Enable log sampling to reduce volume
|
||
|
|
enabled | Bool | default = false,
|
||
|
|
|
||
|
|
# Sample 1 in N log entries
|
||
|
|
rate | Number | optional,
|
||
|
|
} | optional,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Metrics Configuration (Prometheus)
|
||
|
|
metrics | {
|
||
|
|
# Enable metrics collection
|
||
|
|
enabled | Bool | default = true,
|
||
|
|
|
||
|
|
# Exporter backend: prometheus (default), otlp
|
||
|
|
exporter | String | default = "prometheus",
|
||
|
|
|
||
|
|
# Prometheus scrape endpoint path
|
||
|
|
prometheus_path | String | default = "/metrics",
|
||
|
|
|
||
|
|
# Metrics collection interval (seconds)
|
||
|
|
interval | Number | default = 60,
|
||
|
|
|
||
|
|
# Histogram buckets for request latency (milliseconds)
|
||
|
|
histogram_buckets | Array Number | default = [1, 5, 10, 50, 100, 500, 1000, 5000],
|
||
|
|
|
||
|
|
# Cardinality limits (prevent unbounded growth)
|
||
|
|
max_cardinality | Number | optional,
|
||
|
|
|
||
|
|
# Metric retention period (hours)
|
||
|
|
retention_hours | Number | optional,
|
||
|
|
|
||
|
|
# OpenTelemetry push endpoint (if using OTLP)
|
||
|
|
otlp_endpoint | String | optional,
|
||
|
|
|
||
|
|
# OTLP push interval (seconds)
|
||
|
|
otlp_interval | Number | optional,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Health Check Configuration
|
||
|
|
health | {
|
||
|
|
# Enable health check endpoints
|
||
|
|
enabled | Bool | default = true,
|
||
|
|
|
||
|
|
# Health check HTTP server port
|
||
|
|
port | Number | default = 8081,
|
||
|
|
|
||
|
|
# Liveness probe endpoint
|
||
|
|
liveness_path | String | default = "/healthz",
|
||
|
|
|
||
|
|
# Readiness probe endpoint (depends on dependencies)
|
||
|
|
readiness_path | String | default = "/ready",
|
||
|
|
|
||
|
|
# Startup probe endpoint
|
||
|
|
startup_path | String | default = "/startup",
|
||
|
|
|
||
|
|
# Health check probe interval (seconds)
|
||
|
|
interval | Number | default = 10,
|
||
|
|
|
||
|
|
# Probe timeout (milliseconds)
|
||
|
|
timeout | Number | default = 5000,
|
||
|
|
|
||
|
|
# Number of consecutive successes to mark as healthy
|
||
|
|
success_threshold | Number | default = 1,
|
||
|
|
|
||
|
|
# Number of consecutive failures to mark as unhealthy
|
||
|
|
failure_threshold | Number | default = 3,
|
||
|
|
|
||
|
|
# Initial delay before first check (seconds)
|
||
|
|
initial_delay | Number | default = 0,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Distributed Tracing Configuration (OpenTelemetry)
|
||
|
|
tracing | {
|
||
|
|
# Enable distributed tracing
|
||
|
|
enabled | Bool | default = false,
|
||
|
|
|
||
|
|
# Tracer backend: otlp (OpenTelemetry)
|
||
|
|
backend | String | default = "otlp",
|
||
|
|
|
||
|
|
# OpenTelemetry Collector endpoint (gRPC)
|
||
|
|
otlp_endpoint | String | optional,
|
||
|
|
|
||
|
|
# Trace sampler: always, never, parentbased
|
||
|
|
sampler | String | default = "parentbased",
|
||
|
|
|
||
|
|
# Sampling rate (0.0 to 1.0) for parentbased/probability samplers
|
||
|
|
sampling_rate | Number | optional,
|
||
|
|
|
||
|
|
# Service version
|
||
|
|
service_version | String | optional,
|
||
|
|
|
||
|
|
# Environment name (dev, staging, production)
|
||
|
|
environment | String | optional,
|
||
|
|
} | optional,
|
||
|
|
|
||
|
|
# Audit Logging Configuration
|
||
|
|
audit | {
|
||
|
|
# Enable workspace operation auditing
|
||
|
|
enabled | Bool | default = true,
|
||
|
|
|
||
|
|
# Storage backend: file, siem
|
||
|
|
storage | String | default = "file",
|
||
|
|
|
||
|
|
# Audit log file directory
|
||
|
|
log_directory | String | optional,
|
||
|
|
|
||
|
|
# Audit retention period (days)
|
||
|
|
retention_days | Number | default = 90,
|
||
|
|
|
||
|
|
# Include PII in audit logs (GDPR consideration)
|
||
|
|
include_pii | Bool | default = false,
|
||
|
|
|
||
|
|
# Export format(s): jsonl, csv, splunk, elastic
|
||
|
|
export_formats | Array String | default = ["jsonl"],
|
||
|
|
|
||
|
|
# SIEM endpoint (e.g., Splunk, Elastic) for real-time export
|
||
|
|
siem_endpoint | String | optional,
|
||
|
|
|
||
|
|
# Workspace operation tracking
|
||
|
|
track_workspace_operations | Bool | default = true,
|
||
|
|
|
||
|
|
# Tracked operations: create, delete, update, switch, list, sync
|
||
|
|
workspace_operations | Array String | default = ["create", "delete", "update", "switch", "list", "sync"],
|
||
|
|
} | optional,
|
||
|
|
},
|
||
|
|
}
|