263 lines
6.6 KiB
TOML
Raw Normal View History

2025-07-07 23:13:01 +01:00
# Metrics Feature Configuration - Production Environment
# Settings optimized for production monitoring and performance
[features]
metrics = true
# Metrics Configuration - Production
[metrics]
enabled = true
endpoint = "/metrics"
port = 0 # Use main server port
host = "127.0.0.1"
format = "prometheus"
collection_interval = 15 # seconds
retention_period = 86400 # 24 hours
buffer_size = 10000
flush_interval = 30 # seconds
# Prometheus Configuration - Production
[metrics.prometheus]
enabled = true
endpoint = "/metrics"
include_default_metrics = true
include_process_metrics = true
include_runtime_metrics = true
histogram_buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
enable_exemplars = true
exemplar_sample_rate = 0.01
# System Metrics - Production
[metrics.system]
enabled = true
collect_cpu = true
collect_memory = true
collect_disk = true
collect_network = true
collect_load = true
collect_processes = false # Disabled for performance
collection_interval = 30 # seconds
cpu_percent_precision = 2
memory_usage_threshold = 0.85 # Alert threshold
# HTTP Metrics - Production
[metrics.http]
enabled = true
track_requests = true
track_response_times = true
track_status_codes = true
track_request_size = true
track_response_size = true
track_user_agents = false # Disabled for privacy
track_ip_addresses = false # Disabled for privacy
histogram_buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
exclude_paths = ["/health", "/metrics", "/favicon.ico"]
# Database Metrics - Production
[metrics.database]
enabled = true
track_queries = true
track_query_duration = true
track_connection_pool = true
track_slow_queries = true
slow_query_threshold = 500 # milliseconds
track_query_types = true
log_queries = false # Disabled for performance
track_transactions = true
track_locks = true
# Application Metrics - Production
[metrics.application]
enabled = true
track_business_metrics = true
track_custom_counters = true
track_custom_gauges = true
track_custom_histograms = true
track_feature_usage = true
track_errors = true
track_warnings = false # Disabled for noise reduction
track_user_actions = true
track_api_usage = true
# Performance Metrics - Production
[metrics.performance]
enabled = true
track_memory_usage = true
track_cpu_usage = true
track_gc_metrics = true
track_thread_metrics = true
track_async_metrics = true
profile_slow_operations = true
profile_threshold = 100 # milliseconds
enable_profiling = false # Disabled for performance
# Cache Metrics - Production
[metrics.cache]
enabled = true
track_hit_rate = true
track_miss_rate = true
track_eviction_rate = true
track_memory_usage = true
track_operation_times = true
track_key_distribution = false
track_size_distribution = true
# Security Metrics - Production
[metrics.security]
enabled = true
track_failed_logins = true
track_blocked_requests = true
track_rate_limit_hits = true
track_csrf_failures = true
track_auth_events = true
log_security_events = false # Separate security logging
track_suspicious_activity = true
track_intrusion_attempts = true
# Custom Metrics - Production
[metrics.custom]
enabled = true
user_registrations = true
user_logins = true
content_views = true
api_calls = true
feature_toggles = true
error_rates = true
conversion_rates = true
business_kpis = true
# Alerting - Production
[metrics.alerting]
enabled = true
webhook_url = "${ALERT_WEBHOOK_URL}"
alert_on_high_error_rate = true
error_rate_threshold = 0.05 # 5%
alert_on_high_response_time = true
response_time_threshold = 2000 # milliseconds
alert_on_low_memory = true
memory_threshold = 0.15 # 15% available
alert_on_high_cpu = true
cpu_threshold = 0.80 # 80% usage
alert_on_database_issues = true
alert_on_service_down = true
cooldown_period = 300 # seconds
# Export Configuration - Production
[metrics.export]
enabled = true
formats = ["prometheus"]
file_export = false
export_dir = "/var/lib/rustelo/metrics"
export_interval = 60 # seconds
compress_exports = true
retention_days = 7
# Grafana Integration - Production
[metrics.grafana]
enabled = true
url = "${GRAFANA_URL}"
dashboard_enabled = true
auto_create_dashboards = false
api_key = "${GRAFANA_API_KEY}"
organization_id = "${GRAFANA_ORG_ID}"
folder_name = "Rustelo"
datasource_name = "Prometheus"
# Prometheus Integration - Production
[metrics.prometheus]
server_url = "${PROMETHEUS_URL}"
pushgateway_url = "${PROMETHEUS_PUSHGATEWAY_URL}"
job_name = "rustelo"
instance_name = "${HOSTNAME}"
push_interval = 30 # seconds
basic_auth_username = "${PROMETHEUS_USERNAME}"
basic_auth_password = "${PROMETHEUS_PASSWORD}"
# StatsD Configuration - Production
[metrics.statsd]
enabled = false
host = "${STATSD_HOST}"
port = 8125
prefix = "rustelo.prod"
tags_enabled = true
flush_interval = 10 # seconds
max_packet_size = 1400
# Datadog Integration - Production
[metrics.datadog]
enabled = false
api_key = "${DATADOG_API_KEY}"
app_key = "${DATADOG_APP_KEY}"
site = "datadoghq.com" # or "datadoghq.eu"
service_name = "rustelo"
environment = "production"
tags = ["service:rustelo", "env:production"]
# New Relic Integration - Production
[metrics.newrelic]
enabled = false
license_key = "${NEWRELIC_LICENSE_KEY}"
app_name = "Rustelo Production"
log_level = "info"
audit_log_enabled = true
# Logging Configuration - Production
[metrics.logging]
enabled = true
log_level = "info"
log_file = "/var/log/rustelo/metrics.log"
log_to_console = false
log_slow_metrics = true
log_collection_errors = true
log_rotation = "daily"
log_retention_days = 30
# Performance Settings - Production
[metrics.performance]
async_collection = true
buffer_size = 10000
batch_size = 1000
collection_timeout = 5000 # milliseconds
max_memory_usage = 268435456 # 256MB
sampling_rate = 1.0 # 100% sampling
adaptive_sampling = true
high_cardinality_limit = 10000
# Security Settings - Production
[metrics.security]
require_authentication = true
allowed_ips = ["127.0.0.1", "::1"] # Localhost only
api_key_required = true
api_key_header = "X-Metrics-API-Key"
rate_limit_enabled = true
rate_limit_requests = 100
rate_limit_window = 60 # seconds
# Compliance Settings - Production
[metrics.compliance]
gdpr_compliant = true
anonymize_user_data = true
data_retention_days = 90
audit_log_enabled = true
export_enabled = true
right_to_be_forgotten = true
# Health Checks - Production
[metrics.health]
enabled = true
endpoint = "/metrics/health"
check_collectors = true
check_exporters = true
check_storage = true
timeout = 5000 # milliseconds
failure_threshold = 3
# Development Settings - Disabled for production
[metrics.development]
debug_mode = false
verbose_logging = false
collect_debug_metrics = false
expose_internal_metrics = false
enable_metric_explorer = false
mock_external_metrics = false