Rustelo/config/features/metrics/example.toml

# Metrics Feature Configuration - Example Environment
# Complete documentation of all metrics and monitoring options

[features]
metrics = true

[metrics]
enabled = true
endpoint = "/metrics"                     # Metrics endpoint path
format = "prometheus"                     # Metrics format: prometheus, json, influxdb
namespace = "rustelo"                     # Metrics namespace
subsystem = "app"                         # Metrics subsystem
collect_interval = 15                     # Collection interval in seconds
export_interval = 30                      # Export interval in seconds
retention_days = 30                       # Metrics retention period
buffer_size = 10000                       # Metrics buffer size
flush_interval = 5                        # Buffer flush interval in seconds
compression = true                        # Enable metrics compression
authentication_required = false           # Require authentication for metrics endpoint

[metrics.prometheus]
enabled = true                            # Enable Prometheus metrics
port = 9090                               # Prometheus server port
path = "/metrics"                         # Prometheus metrics path
registry = "default"                      # Prometheus registry name
default_labels = { service = "rustelo", version = "1.0.0", environment = "production" }  # Default labels for all metrics
histogram_buckets = [                     # Default histogram buckets
    0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0
]
summary_objectives = [                    # Summary quantiles
    { quantile = 0.5, error = 0.05 },
    { quantile = 0.9, error = 0.01 },
    { quantile = 0.99, error = 0.001 }
]
max_age = 600                             # Maximum age for summary metrics
age_buckets = 5                           # Number of age buckets for summaries

[metrics.influxdb]
enabled = false                           # Enable InfluxDB metrics
host = "localhost"                        # InfluxDB host
port = 8086                               # InfluxDB port
database = "rustelo_metrics"              # InfluxDB database name
username = "admin"                        # InfluxDB username
password = "password"                     # InfluxDB password (use env var)
measurement = "application_metrics"       # InfluxDB measurement name
retention_policy = "autogen"              # InfluxDB retention policy
precision = "s"                           # Timestamp precision: s, ms, us, ns
timeout = 10                              # Connection timeout in seconds
max_retries = 3                           # Maximum retry attempts
batch_size = 1000                         # Batch size for writing points
flush_interval = 10                       # Flush interval in seconds

[metrics.statsd]
enabled = false                           # Enable StatsD metrics
host = "localhost"                        # StatsD host
port = 8125                               # StatsD port
prefix = "rustelo"                        # Metrics prefix
tags_enabled = true                       # Enable tags support
tag_format = "datadog"                    # Tag format: datadog, influxdb
buffer_size = 1000                        # Buffer size for metrics
flush_interval = 5                        # Flush interval in seconds
connection_timeout = 5                    # Connection timeout in seconds

[metrics.system]
enabled = true                            # Collect system metrics
cpu_usage = true                          # Monitor CPU usage
memory_usage = true                       # Monitor memory usage
disk_usage = true                         # Monitor disk usage
disk_io = true                            # Monitor disk I/O
network_usage = true                      # Monitor network usage
network_io = true                         # Monitor network I/O
load_average = true                       # Monitor load average
open_files = true                         # Monitor open file descriptors
processes = true                          # Monitor process count
uptime = true                             # Monitor system uptime
collection_interval = 30                  # System metrics collection interval

[metrics.application]
enabled = true                            # Collect application metrics
request_metrics = true                    # HTTP request metrics
response_metrics = true                   # HTTP response metrics
database_metrics = true                   # Database query metrics
cache_metrics = true                      # Cache hit/miss metrics
error_metrics = true                      # Error rate metrics
performance_metrics = true                # Performance metrics
memory_metrics = true                     # Application memory metrics
gc_metrics = true                         # Garbage collection metrics
thread_metrics = true                     # Thread pool metrics
connection_metrics = true                 # Connection pool metrics

[metrics.http]
enabled = true                            # Enable HTTP metrics
track_requests = true                     # Track HTTP requests
track_responses = true                    # Track HTTP responses
track_duration = true                     # Track request duration
track_size = true                         # Track request/response size
track_status_codes = true                 # Track HTTP status codes
track_methods = true                      # Track HTTP methods
track_paths = true                        # Track request paths
track_user_agents = false                 # Track user agents
track_remote_addr = false                 # Track remote addresses
group_paths = true                        # Group similar paths
exclude_paths = ["/health", "/metrics"]   # Exclude paths from tracking
slow_request_threshold = 1000             # Slow request threshold in milliseconds

[metrics.database]
enabled = true                            # Enable database metrics
track_queries = true                      # Track database queries
track_connections = true                  # Track database connections
track_transactions = true                 # Track database transactions
track_slow_queries = true                 # Track slow queries
slow_query_threshold = 1000               # Slow query threshold in milliseconds
track_query_types = true                  # Track query types (SELECT, INSERT, etc.)
track_tables = true                       # Track table access
track_errors = true                       # Track database errors
connection_pool_metrics = true            # Connection pool metrics
query_cache_metrics = true                # Query cache metrics

[metrics.cache]
enabled = true                            # Enable cache metrics
track_hits = true                         # Track cache hits
track_misses = true                       # Track cache misses
track_sets = true                         # Track cache sets
track_gets = true                         # Track cache gets
track_deletes = true                      # Track cache deletes
track_evictions = true                    # Track cache evictions
track_size = true                         # Track cache size
track_memory_usage = true                 # Track cache memory usage
track_ttl = true                          # Track TTL metrics
per_key_metrics = false                   # Track per-key metrics (high cardinality)

[metrics.business]
enabled = true                            # Enable business metrics
user_registrations = true                 # Track user registrations
user_logins = true                        # Track user logins
user_activity = true                      # Track user activity
content_created = true                    # Track content creation
content_views = true                      # Track content views
api_usage = true                          # Track API usage
feature_usage = true                      # Track feature usage
conversion_metrics = true                 # Track conversion metrics
revenue_metrics = false                   # Track revenue metrics
custom_events = true                      # Track custom events

[metrics.alerts]
enabled = true                            # Enable alerting
alert_manager_url = "http://localhost:9093"  # AlertManager URL
webhook_url = "https://hooks.slack.com/services/xxx"  # Webhook URL for alerts
email_notifications = true                # Enable email notifications
slack_notifications = true                # Enable Slack notifications
pagerduty_notifications = false           # Enable PagerDuty notifications
alert_rules = [
    { name = "high_error_rate", condition = "error_rate > 5.0", duration = "5m", severity = "warning", description = "High error rate detected" },
    { name = "high_response_time", condition = "response_time_p95 > 2000", duration = "5m", severity = "warning", description = "High response time detected" },
    { name = "low_disk_space", condition = "disk_usage > 90", duration = "5m", severity = "critical", description = "Low disk space" }
]

[metrics.dashboards]
enabled = true                            # Enable dashboard integration
grafana_enabled = true                    # Enable Grafana integration
grafana_url = "http://localhost:3000"     # Grafana URL
grafana_api_key = "your-api-key"         # Grafana API key (use env var)
auto_create_dashboards = true             # Auto-create dashboards
dashboard_templates = true                # Use dashboard templates
default_dashboard = "rustelo-overview"    # Default dashboard name
custom_dashboards = [
    { name = "application-overview", panels = ["cpu", "memory", "requests", "errors"] },
    { name = "database-performance", panels = ["query_time", "connections", "slow_queries"] },
    { name = "diagnostic-dashboard", panels = ["error_rates", "latency", "throughput", "system_health"] }
]

[metrics.exporters]
enabled = true                            # Enable metrics exporters
prometheus_exporter = true                # Enable Prometheus exporter
influxdb_exporter = false                 # Enable InfluxDB exporter
statsd_exporter = false                   # Enable StatsD exporter
json_exporter = true                      # Enable JSON exporter
csv_exporter = false                      # Enable CSV exporter
export_directory = "exports"              # Directory for exported metrics
export_schedule = "0 0 * * *"            # Export schedule (daily at midnight)
export_retention = 30                     # Export file retention in days
export_compression = true                 # Compress exported files

[metrics.sampling]
enabled = true                            # Enable metrics sampling
sample_rate = 1.0                         # Sample rate (0.0 to 1.0)
high_cardinality_limit = 10000           # High cardinality limit
adaptive_sampling = true                  # Enable adaptive sampling
sampling_strategies = [
    { metric_pattern = "http_requests_*", sample_rate = 0.1, max_cardinality = 1000 },
    { metric_pattern = "database_queries_*", sample_rate = 0.05, max_cardinality = 500 }
]                                        # Sampling strategies

[metrics.security]
enabled = true                            # Enable metrics security
authentication_required = true            # Require authentication
authorization_enabled = true              # Enable authorization
allowed_roles = ["admin", "operator"]     # Allowed roles for metrics access
tls_enabled = true                        # Enable TLS for metrics endpoint
client_cert_required = false              # Require client certificates
rate_limiting = true                      # Enable rate limiting
rate_limit_requests = 100                 # Rate limit (requests per minute)
ip_whitelist = ["127.0.0.1", "::1"]      # IP whitelist for metrics access
audit_logging = true                      # Enable audit logging

[metrics.performance]
enabled = true                            # Enable performance optimizations
async_collection = true                   # Enable async metrics collection
batch_processing = true                   # Enable batch processing
memory_optimization = true                # Enable memory optimization
compression = true                        # Enable compression
lazy_initialization = true                # Enable lazy initialization
connection_pooling = true                 # Enable connection pooling
cache_metrics = true                      # Cache computed metrics
parallel_processing = true                # Enable parallel processing
buffer_optimization = true                # Optimize buffer usage
gc_optimization = true                    # Optimize garbage collection

[metrics.debugging]
enabled = false                           # Enable debugging (development only)
verbose_logging = false                   # Enable verbose logging
debug_endpoint = "/debug/metrics"         # Debug endpoint path
dump_metrics = false                      # Dump metrics to file
trace_collection = false                  # Trace metrics collection
profiling_enabled = false                 # Enable profiling
memory_profiling = false                  # Enable memory profiling
cpu_profiling = false                     # Enable CPU profiling
debug_labels = false                      # Add debug labels to metrics
validation_enabled = false                # Enable metrics validation

[metrics.storage]
enabled = true                            # Enable metrics storage
storage_backend = "prometheus"            # Storage backend: prometheus, influxdb, file
storage_path = "metrics_data"             # Storage path for file backend
retention_policy = "30d"                  # Retention policy
compression_enabled = true                # Enable storage compression
backup_enabled = true                     # Enable metrics backup
backup_schedule = "0 2 * * *"            # Backup schedule (daily at 2 AM)
backup_retention = 7                      # Backup retention in days
cleanup_enabled = true                    # Enable automatic cleanup
cleanup_schedule = "0 3 * * *"           # Cleanup schedule (daily at 3 AM)

[metrics.integrations]
enabled = true                            # Enable external integrations
datadog_enabled = false                   # Enable Datadog integration
newrelic_enabled = false                  # Enable New Relic integration
dynatrace_enabled = false                 # Enable Dynatrace integration
splunk_enabled = false                    # Enable Splunk integration
elastic_enabled = false                   # Enable Elasticsearch integration
custom_integrations = []                  # Custom integration configurations
webhook_integrations = []                 # Webhook integration configurations
api_integrations = []                     # API integration configurations

[metrics.compliance]
enabled = true                            # Enable compliance features
gdpr_compliance = true                    # Enable GDPR compliance
data_anonymization = true                 # Enable data anonymization
pii_scrubbing = true                      # Scrub PII from metrics
audit_trail = true                        # Maintain audit trail
data_retention_policy = true              # Enforce data retention policy
consent_tracking = false                  # Track consent for metrics
right_to_deletion = true                  # Support right to deletion
data_export = true                        # Support data export
privacy_controls = true                   # Enable privacy controls