# Metrics Feature Configuration - Production Environment # Settings optimized for production monitoring and performance [features] metrics = true # Metrics Configuration - Production [metrics] enabled = true endpoint = "/metrics" port = 0 # Use main server port host = "127.0.0.1" format = "prometheus" collection_interval = 15 # seconds retention_period = 86400 # 24 hours buffer_size = 10000 flush_interval = 30 # seconds # Prometheus Configuration - Production [metrics.prometheus] enabled = true endpoint = "/metrics" include_default_metrics = true include_process_metrics = true include_runtime_metrics = true histogram_buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] enable_exemplars = true exemplar_sample_rate = 0.01 # System Metrics - Production [metrics.system] enabled = true collect_cpu = true collect_memory = true collect_disk = true collect_network = true collect_load = true collect_processes = false # Disabled for performance collection_interval = 30 # seconds cpu_percent_precision = 2 memory_usage_threshold = 0.85 # Alert threshold # HTTP Metrics - Production [metrics.http] enabled = true track_requests = true track_response_times = true track_status_codes = true track_request_size = true track_response_size = true track_user_agents = false # Disabled for privacy track_ip_addresses = false # Disabled for privacy histogram_buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] exclude_paths = ["/health", "/metrics", "/favicon.ico"] # Database Metrics - Production [metrics.database] enabled = true track_queries = true track_query_duration = true track_connection_pool = true track_slow_queries = true slow_query_threshold = 500 # milliseconds track_query_types = true log_queries = false # Disabled for performance track_transactions = true track_locks = true # Application Metrics - Production [metrics.application] enabled = true track_business_metrics = true track_custom_counters = true track_custom_gauges = true track_custom_histograms = true track_feature_usage = true track_errors = true track_warnings = false # Disabled for noise reduction track_user_actions = true track_api_usage = true # Performance Metrics - Production [metrics.performance] enabled = true track_memory_usage = true track_cpu_usage = true track_gc_metrics = true track_thread_metrics = true track_async_metrics = true profile_slow_operations = true profile_threshold = 100 # milliseconds enable_profiling = false # Disabled for performance # Cache Metrics - Production [metrics.cache] enabled = true track_hit_rate = true track_miss_rate = true track_eviction_rate = true track_memory_usage = true track_operation_times = true track_key_distribution = false track_size_distribution = true # Security Metrics - Production [metrics.security] enabled = true track_failed_logins = true track_blocked_requests = true track_rate_limit_hits = true track_csrf_failures = true track_auth_events = true log_security_events = false # Separate security logging track_suspicious_activity = true track_intrusion_attempts = true # Custom Metrics - Production [metrics.custom] enabled = true user_registrations = true user_logins = true content_views = true api_calls = true feature_toggles = true error_rates = true conversion_rates = true business_kpis = true # Alerting - Production [metrics.alerting] enabled = true webhook_url = "${ALERT_WEBHOOK_URL}" alert_on_high_error_rate = true error_rate_threshold = 0.05 # 5% alert_on_high_response_time = true response_time_threshold = 2000 # milliseconds alert_on_low_memory = true memory_threshold = 0.15 # 15% available alert_on_high_cpu = true cpu_threshold = 0.80 # 80% usage alert_on_database_issues = true alert_on_service_down = true cooldown_period = 300 # seconds # Export Configuration - Production [metrics.export] enabled = true formats = ["prometheus"] file_export = false export_dir = "/var/lib/rustelo/metrics" export_interval = 60 # seconds compress_exports = true retention_days = 7 # Grafana Integration - Production [metrics.grafana] enabled = true url = "${GRAFANA_URL}" dashboard_enabled = true auto_create_dashboards = false api_key = "${GRAFANA_API_KEY}" organization_id = "${GRAFANA_ORG_ID}" folder_name = "Rustelo" datasource_name = "Prometheus" # Prometheus Integration - Production [metrics.prometheus] server_url = "${PROMETHEUS_URL}" pushgateway_url = "${PROMETHEUS_PUSHGATEWAY_URL}" job_name = "rustelo" instance_name = "${HOSTNAME}" push_interval = 30 # seconds basic_auth_username = "${PROMETHEUS_USERNAME}" basic_auth_password = "${PROMETHEUS_PASSWORD}" # StatsD Configuration - Production [metrics.statsd] enabled = false host = "${STATSD_HOST}" port = 8125 prefix = "rustelo.prod" tags_enabled = true flush_interval = 10 # seconds max_packet_size = 1400 # Datadog Integration - Production [metrics.datadog] enabled = false api_key = "${DATADOG_API_KEY}" app_key = "${DATADOG_APP_KEY}" site = "datadoghq.com" # or "datadoghq.eu" service_name = "rustelo" environment = "production" tags = ["service:rustelo", "env:production"] # New Relic Integration - Production [metrics.newrelic] enabled = false license_key = "${NEWRELIC_LICENSE_KEY}" app_name = "Rustelo Production" log_level = "info" audit_log_enabled = true # Logging Configuration - Production [metrics.logging] enabled = true log_level = "info" log_file = "/var/log/rustelo/metrics.log" log_to_console = false log_slow_metrics = true log_collection_errors = true log_rotation = "daily" log_retention_days = 30 # Performance Settings - Production [metrics.performance] async_collection = true buffer_size = 10000 batch_size = 1000 collection_timeout = 5000 # milliseconds max_memory_usage = 268435456 # 256MB sampling_rate = 1.0 # 100% sampling adaptive_sampling = true high_cardinality_limit = 10000 # Security Settings - Production [metrics.security] require_authentication = true allowed_ips = ["127.0.0.1", "::1"] # Localhost only api_key_required = true api_key_header = "X-Metrics-API-Key" rate_limit_enabled = true rate_limit_requests = 100 rate_limit_window = 60 # seconds # Compliance Settings - Production [metrics.compliance] gdpr_compliant = true anonymize_user_data = true data_retention_days = 90 audit_log_enabled = true export_enabled = true right_to_be_forgotten = true # Health Checks - Production [metrics.health] enabled = true endpoint = "/metrics/health" check_collectors = true check_exporters = true check_storage = true timeout = 5000 # milliseconds failure_threshold = 3 # Development Settings - Disabled for production [metrics.development] debug_mode = false verbose_logging = false collect_debug_metrics = false expose_internal_metrics = false enable_metric_explorer = false mock_external_metrics = false