263 lines
6.6 KiB
TOML
263 lines
6.6 KiB
TOML
![]() |
# Metrics Feature Configuration - Production Environment
|
||
|
# Settings optimized for production monitoring and performance
|
||
|
|
||
|
[features]
|
||
|
metrics = true
|
||
|
|
||
|
# Metrics Configuration - Production
|
||
|
[metrics]
|
||
|
enabled = true
|
||
|
endpoint = "/metrics"
|
||
|
port = 0 # Use main server port
|
||
|
host = "127.0.0.1"
|
||
|
format = "prometheus"
|
||
|
collection_interval = 15 # seconds
|
||
|
retention_period = 86400 # 24 hours
|
||
|
buffer_size = 10000
|
||
|
flush_interval = 30 # seconds
|
||
|
|
||
|
# Prometheus Configuration - Production
|
||
|
[metrics.prometheus]
|
||
|
enabled = true
|
||
|
endpoint = "/metrics"
|
||
|
include_default_metrics = true
|
||
|
include_process_metrics = true
|
||
|
include_runtime_metrics = true
|
||
|
histogram_buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
|
enable_exemplars = true
|
||
|
exemplar_sample_rate = 0.01
|
||
|
|
||
|
# System Metrics - Production
|
||
|
[metrics.system]
|
||
|
enabled = true
|
||
|
collect_cpu = true
|
||
|
collect_memory = true
|
||
|
collect_disk = true
|
||
|
collect_network = true
|
||
|
collect_load = true
|
||
|
collect_processes = false # Disabled for performance
|
||
|
collection_interval = 30 # seconds
|
||
|
cpu_percent_precision = 2
|
||
|
memory_usage_threshold = 0.85 # Alert threshold
|
||
|
|
||
|
# HTTP Metrics - Production
|
||
|
[metrics.http]
|
||
|
enabled = true
|
||
|
track_requests = true
|
||
|
track_response_times = true
|
||
|
track_status_codes = true
|
||
|
track_request_size = true
|
||
|
track_response_size = true
|
||
|
track_user_agents = false # Disabled for privacy
|
||
|
track_ip_addresses = false # Disabled for privacy
|
||
|
histogram_buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
|
exclude_paths = ["/health", "/metrics", "/favicon.ico"]
|
||
|
|
||
|
# Database Metrics - Production
|
||
|
[metrics.database]
|
||
|
enabled = true
|
||
|
track_queries = true
|
||
|
track_query_duration = true
|
||
|
track_connection_pool = true
|
||
|
track_slow_queries = true
|
||
|
slow_query_threshold = 500 # milliseconds
|
||
|
track_query_types = true
|
||
|
log_queries = false # Disabled for performance
|
||
|
track_transactions = true
|
||
|
track_locks = true
|
||
|
|
||
|
# Application Metrics - Production
|
||
|
[metrics.application]
|
||
|
enabled = true
|
||
|
track_business_metrics = true
|
||
|
track_custom_counters = true
|
||
|
track_custom_gauges = true
|
||
|
track_custom_histograms = true
|
||
|
track_feature_usage = true
|
||
|
track_errors = true
|
||
|
track_warnings = false # Disabled for noise reduction
|
||
|
track_user_actions = true
|
||
|
track_api_usage = true
|
||
|
|
||
|
# Performance Metrics - Production
|
||
|
[metrics.performance]
|
||
|
enabled = true
|
||
|
track_memory_usage = true
|
||
|
track_cpu_usage = true
|
||
|
track_gc_metrics = true
|
||
|
track_thread_metrics = true
|
||
|
track_async_metrics = true
|
||
|
profile_slow_operations = true
|
||
|
profile_threshold = 100 # milliseconds
|
||
|
enable_profiling = false # Disabled for performance
|
||
|
|
||
|
# Cache Metrics - Production
|
||
|
[metrics.cache]
|
||
|
enabled = true
|
||
|
track_hit_rate = true
|
||
|
track_miss_rate = true
|
||
|
track_eviction_rate = true
|
||
|
track_memory_usage = true
|
||
|
track_operation_times = true
|
||
|
track_key_distribution = false
|
||
|
track_size_distribution = true
|
||
|
|
||
|
# Security Metrics - Production
|
||
|
[metrics.security]
|
||
|
enabled = true
|
||
|
track_failed_logins = true
|
||
|
track_blocked_requests = true
|
||
|
track_rate_limit_hits = true
|
||
|
track_csrf_failures = true
|
||
|
track_auth_events = true
|
||
|
log_security_events = false # Separate security logging
|
||
|
track_suspicious_activity = true
|
||
|
track_intrusion_attempts = true
|
||
|
|
||
|
# Custom Metrics - Production
|
||
|
[metrics.custom]
|
||
|
enabled = true
|
||
|
user_registrations = true
|
||
|
user_logins = true
|
||
|
content_views = true
|
||
|
api_calls = true
|
||
|
feature_toggles = true
|
||
|
error_rates = true
|
||
|
conversion_rates = true
|
||
|
business_kpis = true
|
||
|
|
||
|
# Alerting - Production
|
||
|
[metrics.alerting]
|
||
|
enabled = true
|
||
|
webhook_url = "${ALERT_WEBHOOK_URL}"
|
||
|
alert_on_high_error_rate = true
|
||
|
error_rate_threshold = 0.05 # 5%
|
||
|
alert_on_high_response_time = true
|
||
|
response_time_threshold = 2000 # milliseconds
|
||
|
alert_on_low_memory = true
|
||
|
memory_threshold = 0.15 # 15% available
|
||
|
alert_on_high_cpu = true
|
||
|
cpu_threshold = 0.80 # 80% usage
|
||
|
alert_on_database_issues = true
|
||
|
alert_on_service_down = true
|
||
|
cooldown_period = 300 # seconds
|
||
|
|
||
|
# Export Configuration - Production
|
||
|
[metrics.export]
|
||
|
enabled = true
|
||
|
formats = ["prometheus"]
|
||
|
file_export = false
|
||
|
export_dir = "/var/lib/rustelo/metrics"
|
||
|
export_interval = 60 # seconds
|
||
|
compress_exports = true
|
||
|
retention_days = 7
|
||
|
|
||
|
# Grafana Integration - Production
|
||
|
[metrics.grafana]
|
||
|
enabled = true
|
||
|
url = "${GRAFANA_URL}"
|
||
|
dashboard_enabled = true
|
||
|
auto_create_dashboards = false
|
||
|
api_key = "${GRAFANA_API_KEY}"
|
||
|
organization_id = "${GRAFANA_ORG_ID}"
|
||
|
folder_name = "Rustelo"
|
||
|
datasource_name = "Prometheus"
|
||
|
|
||
|
# Prometheus Integration - Production
|
||
|
[metrics.prometheus]
|
||
|
server_url = "${PROMETHEUS_URL}"
|
||
|
pushgateway_url = "${PROMETHEUS_PUSHGATEWAY_URL}"
|
||
|
job_name = "rustelo"
|
||
|
instance_name = "${HOSTNAME}"
|
||
|
push_interval = 30 # seconds
|
||
|
basic_auth_username = "${PROMETHEUS_USERNAME}"
|
||
|
basic_auth_password = "${PROMETHEUS_PASSWORD}"
|
||
|
|
||
|
# StatsD Configuration - Production
|
||
|
[metrics.statsd]
|
||
|
enabled = false
|
||
|
host = "${STATSD_HOST}"
|
||
|
port = 8125
|
||
|
prefix = "rustelo.prod"
|
||
|
tags_enabled = true
|
||
|
flush_interval = 10 # seconds
|
||
|
max_packet_size = 1400
|
||
|
|
||
|
# Datadog Integration - Production
|
||
|
[metrics.datadog]
|
||
|
enabled = false
|
||
|
api_key = "${DATADOG_API_KEY}"
|
||
|
app_key = "${DATADOG_APP_KEY}"
|
||
|
site = "datadoghq.com" # or "datadoghq.eu"
|
||
|
service_name = "rustelo"
|
||
|
environment = "production"
|
||
|
tags = ["service:rustelo", "env:production"]
|
||
|
|
||
|
# New Relic Integration - Production
|
||
|
[metrics.newrelic]
|
||
|
enabled = false
|
||
|
license_key = "${NEWRELIC_LICENSE_KEY}"
|
||
|
app_name = "Rustelo Production"
|
||
|
log_level = "info"
|
||
|
audit_log_enabled = true
|
||
|
|
||
|
# Logging Configuration - Production
|
||
|
[metrics.logging]
|
||
|
enabled = true
|
||
|
log_level = "info"
|
||
|
log_file = "/var/log/rustelo/metrics.log"
|
||
|
log_to_console = false
|
||
|
log_slow_metrics = true
|
||
|
log_collection_errors = true
|
||
|
log_rotation = "daily"
|
||
|
log_retention_days = 30
|
||
|
|
||
|
# Performance Settings - Production
|
||
|
[metrics.performance]
|
||
|
async_collection = true
|
||
|
buffer_size = 10000
|
||
|
batch_size = 1000
|
||
|
collection_timeout = 5000 # milliseconds
|
||
|
max_memory_usage = 268435456 # 256MB
|
||
|
sampling_rate = 1.0 # 100% sampling
|
||
|
adaptive_sampling = true
|
||
|
high_cardinality_limit = 10000
|
||
|
|
||
|
# Security Settings - Production
|
||
|
[metrics.security]
|
||
|
require_authentication = true
|
||
|
allowed_ips = ["127.0.0.1", "::1"] # Localhost only
|
||
|
api_key_required = true
|
||
|
api_key_header = "X-Metrics-API-Key"
|
||
|
rate_limit_enabled = true
|
||
|
rate_limit_requests = 100
|
||
|
rate_limit_window = 60 # seconds
|
||
|
|
||
|
# Compliance Settings - Production
|
||
|
[metrics.compliance]
|
||
|
gdpr_compliant = true
|
||
|
anonymize_user_data = true
|
||
|
data_retention_days = 90
|
||
|
audit_log_enabled = true
|
||
|
export_enabled = true
|
||
|
right_to_be_forgotten = true
|
||
|
|
||
|
# Health Checks - Production
|
||
|
[metrics.health]
|
||
|
enabled = true
|
||
|
endpoint = "/metrics/health"
|
||
|
check_collectors = true
|
||
|
check_exporters = true
|
||
|
check_storage = true
|
||
|
timeout = 5000 # milliseconds
|
||
|
failure_threshold = 3
|
||
|
|
||
|
# Development Settings - Disabled for production
|
||
|
[metrics.development]
|
||
|
debug_mode = false
|
||
|
verbose_logging = false
|
||
|
collect_debug_metrics = false
|
||
|
expose_internal_metrics = false
|
||
|
enable_metric_explorer = false
|
||
|
mock_external_metrics = false
|