# Metrics Feature Configuration - Example Environment # Complete documentation of all metrics and monitoring options [features] metrics = true [metrics] enabled = true endpoint = "/metrics" # Metrics endpoint path format = "prometheus" # Metrics format: prometheus, json, influxdb namespace = "rustelo" # Metrics namespace subsystem = "app" # Metrics subsystem collect_interval = 15 # Collection interval in seconds export_interval = 30 # Export interval in seconds retention_days = 30 # Metrics retention period buffer_size = 10000 # Metrics buffer size flush_interval = 5 # Buffer flush interval in seconds compression = true # Enable metrics compression authentication_required = false # Require authentication for metrics endpoint [metrics.prometheus] enabled = true # Enable Prometheus metrics port = 9090 # Prometheus server port path = "/metrics" # Prometheus metrics path registry = "default" # Prometheus registry name default_labels = { service = "rustelo", version = "1.0.0", environment = "production" } # Default labels for all metrics histogram_buckets = [ # Default histogram buckets 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0 ] summary_objectives = [ # Summary quantiles { quantile = 0.5, error = 0.05 }, { quantile = 0.9, error = 0.01 }, { quantile = 0.99, error = 0.001 } ] max_age = 600 # Maximum age for summary metrics age_buckets = 5 # Number of age buckets for summaries [metrics.influxdb] enabled = false # Enable InfluxDB metrics host = "localhost" # InfluxDB host port = 8086 # InfluxDB port database = "rustelo_metrics" # InfluxDB database name username = "admin" # InfluxDB username password = "password" # InfluxDB password (use env var) measurement = "application_metrics" # InfluxDB measurement name retention_policy = "autogen" # InfluxDB retention policy precision = "s" # Timestamp precision: s, ms, us, ns timeout = 10 # Connection timeout in seconds max_retries = 3 # Maximum retry attempts batch_size = 1000 # Batch size for writing points flush_interval = 10 # Flush interval in seconds [metrics.statsd] enabled = false # Enable StatsD metrics host = "localhost" # StatsD host port = 8125 # StatsD port prefix = "rustelo" # Metrics prefix tags_enabled = true # Enable tags support tag_format = "datadog" # Tag format: datadog, influxdb buffer_size = 1000 # Buffer size for metrics flush_interval = 5 # Flush interval in seconds connection_timeout = 5 # Connection timeout in seconds [metrics.system] enabled = true # Collect system metrics cpu_usage = true # Monitor CPU usage memory_usage = true # Monitor memory usage disk_usage = true # Monitor disk usage disk_io = true # Monitor disk I/O network_usage = true # Monitor network usage network_io = true # Monitor network I/O load_average = true # Monitor load average open_files = true # Monitor open file descriptors processes = true # Monitor process count uptime = true # Monitor system uptime collection_interval = 30 # System metrics collection interval [metrics.application] enabled = true # Collect application metrics request_metrics = true # HTTP request metrics response_metrics = true # HTTP response metrics database_metrics = true # Database query metrics cache_metrics = true # Cache hit/miss metrics error_metrics = true # Error rate metrics performance_metrics = true # Performance metrics memory_metrics = true # Application memory metrics gc_metrics = true # Garbage collection metrics thread_metrics = true # Thread pool metrics connection_metrics = true # Connection pool metrics [metrics.http] enabled = true # Enable HTTP metrics track_requests = true # Track HTTP requests track_responses = true # Track HTTP responses track_duration = true # Track request duration track_size = true # Track request/response size track_status_codes = true # Track HTTP status codes track_methods = true # Track HTTP methods track_paths = true # Track request paths track_user_agents = false # Track user agents track_remote_addr = false # Track remote addresses group_paths = true # Group similar paths exclude_paths = ["/health", "/metrics"] # Exclude paths from tracking slow_request_threshold = 1000 # Slow request threshold in milliseconds [metrics.database] enabled = true # Enable database metrics track_queries = true # Track database queries track_connections = true # Track database connections track_transactions = true # Track database transactions track_slow_queries = true # Track slow queries slow_query_threshold = 1000 # Slow query threshold in milliseconds track_query_types = true # Track query types (SELECT, INSERT, etc.) track_tables = true # Track table access track_errors = true # Track database errors connection_pool_metrics = true # Connection pool metrics query_cache_metrics = true # Query cache metrics [metrics.cache] enabled = true # Enable cache metrics track_hits = true # Track cache hits track_misses = true # Track cache misses track_sets = true # Track cache sets track_gets = true # Track cache gets track_deletes = true # Track cache deletes track_evictions = true # Track cache evictions track_size = true # Track cache size track_memory_usage = true # Track cache memory usage track_ttl = true # Track TTL metrics per_key_metrics = false # Track per-key metrics (high cardinality) [metrics.business] enabled = true # Enable business metrics user_registrations = true # Track user registrations user_logins = true # Track user logins user_activity = true # Track user activity content_created = true # Track content creation content_views = true # Track content views api_usage = true # Track API usage feature_usage = true # Track feature usage conversion_metrics = true # Track conversion metrics revenue_metrics = false # Track revenue metrics custom_events = true # Track custom events [metrics.alerts] enabled = true # Enable alerting alert_manager_url = "http://localhost:9093" # AlertManager URL webhook_url = "https://hooks.slack.com/services/xxx" # Webhook URL for alerts email_notifications = true # Enable email notifications slack_notifications = true # Enable Slack notifications pagerduty_notifications = false # Enable PagerDuty notifications alert_rules = [ { name = "high_error_rate", condition = "error_rate > 5.0", duration = "5m", severity = "warning", description = "High error rate detected" }, { name = "high_response_time", condition = "response_time_p95 > 2000", duration = "5m", severity = "warning", description = "High response time detected" }, { name = "low_disk_space", condition = "disk_usage > 90", duration = "5m", severity = "critical", description = "Low disk space" } ] [metrics.dashboards] enabled = true # Enable dashboard integration grafana_enabled = true # Enable Grafana integration grafana_url = "http://localhost:3000" # Grafana URL grafana_api_key = "your-api-key" # Grafana API key (use env var) auto_create_dashboards = true # Auto-create dashboards dashboard_templates = true # Use dashboard templates default_dashboard = "rustelo-overview" # Default dashboard name custom_dashboards = [ { name = "application-overview", panels = ["cpu", "memory", "requests", "errors"] }, { name = "database-performance", panels = ["query_time", "connections", "slow_queries"] }, { name = "diagnostic-dashboard", panels = ["error_rates", "latency", "throughput", "system_health"] } ] [metrics.exporters] enabled = true # Enable metrics exporters prometheus_exporter = true # Enable Prometheus exporter influxdb_exporter = false # Enable InfluxDB exporter statsd_exporter = false # Enable StatsD exporter json_exporter = true # Enable JSON exporter csv_exporter = false # Enable CSV exporter export_directory = "exports" # Directory for exported metrics export_schedule = "0 0 * * *" # Export schedule (daily at midnight) export_retention = 30 # Export file retention in days export_compression = true # Compress exported files [metrics.sampling] enabled = true # Enable metrics sampling sample_rate = 1.0 # Sample rate (0.0 to 1.0) high_cardinality_limit = 10000 # High cardinality limit adaptive_sampling = true # Enable adaptive sampling sampling_strategies = [ { metric_pattern = "http_requests_*", sample_rate = 0.1, max_cardinality = 1000 }, { metric_pattern = "database_queries_*", sample_rate = 0.05, max_cardinality = 500 } ] # Sampling strategies [metrics.security] enabled = true # Enable metrics security authentication_required = true # Require authentication authorization_enabled = true # Enable authorization allowed_roles = ["admin", "operator"] # Allowed roles for metrics access tls_enabled = true # Enable TLS for metrics endpoint client_cert_required = false # Require client certificates rate_limiting = true # Enable rate limiting rate_limit_requests = 100 # Rate limit (requests per minute) ip_whitelist = ["127.0.0.1", "::1"] # IP whitelist for metrics access audit_logging = true # Enable audit logging [metrics.performance] enabled = true # Enable performance optimizations async_collection = true # Enable async metrics collection batch_processing = true # Enable batch processing memory_optimization = true # Enable memory optimization compression = true # Enable compression lazy_initialization = true # Enable lazy initialization connection_pooling = true # Enable connection pooling cache_metrics = true # Cache computed metrics parallel_processing = true # Enable parallel processing buffer_optimization = true # Optimize buffer usage gc_optimization = true # Optimize garbage collection [metrics.debugging] enabled = false # Enable debugging (development only) verbose_logging = false # Enable verbose logging debug_endpoint = "/debug/metrics" # Debug endpoint path dump_metrics = false # Dump metrics to file trace_collection = false # Trace metrics collection profiling_enabled = false # Enable profiling memory_profiling = false # Enable memory profiling cpu_profiling = false # Enable CPU profiling debug_labels = false # Add debug labels to metrics validation_enabled = false # Enable metrics validation [metrics.storage] enabled = true # Enable metrics storage storage_backend = "prometheus" # Storage backend: prometheus, influxdb, file storage_path = "metrics_data" # Storage path for file backend retention_policy = "30d" # Retention policy compression_enabled = true # Enable storage compression backup_enabled = true # Enable metrics backup backup_schedule = "0 2 * * *" # Backup schedule (daily at 2 AM) backup_retention = 7 # Backup retention in days cleanup_enabled = true # Enable automatic cleanup cleanup_schedule = "0 3 * * *" # Cleanup schedule (daily at 3 AM) [metrics.integrations] enabled = true # Enable external integrations datadog_enabled = false # Enable Datadog integration newrelic_enabled = false # Enable New Relic integration dynatrace_enabled = false # Enable Dynatrace integration splunk_enabled = false # Enable Splunk integration elastic_enabled = false # Enable Elasticsearch integration custom_integrations = [] # Custom integration configurations webhook_integrations = [] # Webhook integration configurations api_integrations = [] # API integration configurations [metrics.compliance] enabled = true # Enable compliance features gdpr_compliance = true # Enable GDPR compliance data_anonymization = true # Enable data anonymization pii_scrubbing = true # Scrub PII from metrics audit_trail = true # Maintain audit trail data_retention_policy = true # Enforce data retention policy consent_tracking = false # Track consent for metrics right_to_deletion = true # Support right to deletion data_export = true # Support data export privacy_controls = true # Enable privacy controls