Jesús Pérex 515c9343f4
Some checks failed
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Security Audit (push) Has been cancelled
CI/CD Pipeline / Build Docker Image (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Performance Benchmarks (push) Has been cancelled
CI/CD Pipeline / Cleanup (push) Has been cancelled
chore: add config path
2025-07-07 23:13:01 +01:00

265 lines
15 KiB
TOML

# Metrics Feature Configuration - Example Environment
# Complete documentation of all metrics and monitoring options
[features]
metrics = true
[metrics]
enabled = true
endpoint = "/metrics" # Metrics endpoint path
format = "prometheus" # Metrics format: prometheus, json, influxdb
namespace = "rustelo" # Metrics namespace
subsystem = "app" # Metrics subsystem
collect_interval = 15 # Collection interval in seconds
export_interval = 30 # Export interval in seconds
retention_days = 30 # Metrics retention period
buffer_size = 10000 # Metrics buffer size
flush_interval = 5 # Buffer flush interval in seconds
compression = true # Enable metrics compression
authentication_required = false # Require authentication for metrics endpoint
[metrics.prometheus]
enabled = true # Enable Prometheus metrics
port = 9090 # Prometheus server port
path = "/metrics" # Prometheus metrics path
registry = "default" # Prometheus registry name
default_labels = { service = "rustelo", version = "1.0.0", environment = "production" } # Default labels for all metrics
histogram_buckets = [ # Default histogram buckets
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0
]
summary_objectives = [ # Summary quantiles
{ quantile = 0.5, error = 0.05 },
{ quantile = 0.9, error = 0.01 },
{ quantile = 0.99, error = 0.001 }
]
max_age = 600 # Maximum age for summary metrics
age_buckets = 5 # Number of age buckets for summaries
[metrics.influxdb]
enabled = false # Enable InfluxDB metrics
host = "localhost" # InfluxDB host
port = 8086 # InfluxDB port
database = "rustelo_metrics" # InfluxDB database name
username = "admin" # InfluxDB username
password = "password" # InfluxDB password (use env var)
measurement = "application_metrics" # InfluxDB measurement name
retention_policy = "autogen" # InfluxDB retention policy
precision = "s" # Timestamp precision: s, ms, us, ns
timeout = 10 # Connection timeout in seconds
max_retries = 3 # Maximum retry attempts
batch_size = 1000 # Batch size for writing points
flush_interval = 10 # Flush interval in seconds
[metrics.statsd]
enabled = false # Enable StatsD metrics
host = "localhost" # StatsD host
port = 8125 # StatsD port
prefix = "rustelo" # Metrics prefix
tags_enabled = true # Enable tags support
tag_format = "datadog" # Tag format: datadog, influxdb
buffer_size = 1000 # Buffer size for metrics
flush_interval = 5 # Flush interval in seconds
connection_timeout = 5 # Connection timeout in seconds
[metrics.system]
enabled = true # Collect system metrics
cpu_usage = true # Monitor CPU usage
memory_usage = true # Monitor memory usage
disk_usage = true # Monitor disk usage
disk_io = true # Monitor disk I/O
network_usage = true # Monitor network usage
network_io = true # Monitor network I/O
load_average = true # Monitor load average
open_files = true # Monitor open file descriptors
processes = true # Monitor process count
uptime = true # Monitor system uptime
collection_interval = 30 # System metrics collection interval
[metrics.application]
enabled = true # Collect application metrics
request_metrics = true # HTTP request metrics
response_metrics = true # HTTP response metrics
database_metrics = true # Database query metrics
cache_metrics = true # Cache hit/miss metrics
error_metrics = true # Error rate metrics
performance_metrics = true # Performance metrics
memory_metrics = true # Application memory metrics
gc_metrics = true # Garbage collection metrics
thread_metrics = true # Thread pool metrics
connection_metrics = true # Connection pool metrics
[metrics.http]
enabled = true # Enable HTTP metrics
track_requests = true # Track HTTP requests
track_responses = true # Track HTTP responses
track_duration = true # Track request duration
track_size = true # Track request/response size
track_status_codes = true # Track HTTP status codes
track_methods = true # Track HTTP methods
track_paths = true # Track request paths
track_user_agents = false # Track user agents
track_remote_addr = false # Track remote addresses
group_paths = true # Group similar paths
exclude_paths = ["/health", "/metrics"] # Exclude paths from tracking
slow_request_threshold = 1000 # Slow request threshold in milliseconds
[metrics.database]
enabled = true # Enable database metrics
track_queries = true # Track database queries
track_connections = true # Track database connections
track_transactions = true # Track database transactions
track_slow_queries = true # Track slow queries
slow_query_threshold = 1000 # Slow query threshold in milliseconds
track_query_types = true # Track query types (SELECT, INSERT, etc.)
track_tables = true # Track table access
track_errors = true # Track database errors
connection_pool_metrics = true # Connection pool metrics
query_cache_metrics = true # Query cache metrics
[metrics.cache]
enabled = true # Enable cache metrics
track_hits = true # Track cache hits
track_misses = true # Track cache misses
track_sets = true # Track cache sets
track_gets = true # Track cache gets
track_deletes = true # Track cache deletes
track_evictions = true # Track cache evictions
track_size = true # Track cache size
track_memory_usage = true # Track cache memory usage
track_ttl = true # Track TTL metrics
per_key_metrics = false # Track per-key metrics (high cardinality)
[metrics.business]
enabled = true # Enable business metrics
user_registrations = true # Track user registrations
user_logins = true # Track user logins
user_activity = true # Track user activity
content_created = true # Track content creation
content_views = true # Track content views
api_usage = true # Track API usage
feature_usage = true # Track feature usage
conversion_metrics = true # Track conversion metrics
revenue_metrics = false # Track revenue metrics
custom_events = true # Track custom events
[metrics.alerts]
enabled = true # Enable alerting
alert_manager_url = "http://localhost:9093" # AlertManager URL
webhook_url = "https://hooks.slack.com/services/xxx" # Webhook URL for alerts
email_notifications = true # Enable email notifications
slack_notifications = true # Enable Slack notifications
pagerduty_notifications = false # Enable PagerDuty notifications
alert_rules = [
{ name = "high_error_rate", condition = "error_rate > 5.0", duration = "5m", severity = "warning", description = "High error rate detected" },
{ name = "high_response_time", condition = "response_time_p95 > 2000", duration = "5m", severity = "warning", description = "High response time detected" },
{ name = "low_disk_space", condition = "disk_usage > 90", duration = "5m", severity = "critical", description = "Low disk space" }
]
[metrics.dashboards]
enabled = true # Enable dashboard integration
grafana_enabled = true # Enable Grafana integration
grafana_url = "http://localhost:3000" # Grafana URL
grafana_api_key = "your-api-key" # Grafana API key (use env var)
auto_create_dashboards = true # Auto-create dashboards
dashboard_templates = true # Use dashboard templates
default_dashboard = "rustelo-overview" # Default dashboard name
custom_dashboards = [
{ name = "application-overview", panels = ["cpu", "memory", "requests", "errors"] },
{ name = "database-performance", panels = ["query_time", "connections", "slow_queries"] },
{ name = "diagnostic-dashboard", panels = ["error_rates", "latency", "throughput", "system_health"] }
]
[metrics.exporters]
enabled = true # Enable metrics exporters
prometheus_exporter = true # Enable Prometheus exporter
influxdb_exporter = false # Enable InfluxDB exporter
statsd_exporter = false # Enable StatsD exporter
json_exporter = true # Enable JSON exporter
csv_exporter = false # Enable CSV exporter
export_directory = "exports" # Directory for exported metrics
export_schedule = "0 0 * * *" # Export schedule (daily at midnight)
export_retention = 30 # Export file retention in days
export_compression = true # Compress exported files
[metrics.sampling]
enabled = true # Enable metrics sampling
sample_rate = 1.0 # Sample rate (0.0 to 1.0)
high_cardinality_limit = 10000 # High cardinality limit
adaptive_sampling = true # Enable adaptive sampling
sampling_strategies = [
{ metric_pattern = "http_requests_*", sample_rate = 0.1, max_cardinality = 1000 },
{ metric_pattern = "database_queries_*", sample_rate = 0.05, max_cardinality = 500 }
] # Sampling strategies
[metrics.security]
enabled = true # Enable metrics security
authentication_required = true # Require authentication
authorization_enabled = true # Enable authorization
allowed_roles = ["admin", "operator"] # Allowed roles for metrics access
tls_enabled = true # Enable TLS for metrics endpoint
client_cert_required = false # Require client certificates
rate_limiting = true # Enable rate limiting
rate_limit_requests = 100 # Rate limit (requests per minute)
ip_whitelist = ["127.0.0.1", "::1"] # IP whitelist for metrics access
audit_logging = true # Enable audit logging
[metrics.performance]
enabled = true # Enable performance optimizations
async_collection = true # Enable async metrics collection
batch_processing = true # Enable batch processing
memory_optimization = true # Enable memory optimization
compression = true # Enable compression
lazy_initialization = true # Enable lazy initialization
connection_pooling = true # Enable connection pooling
cache_metrics = true # Cache computed metrics
parallel_processing = true # Enable parallel processing
buffer_optimization = true # Optimize buffer usage
gc_optimization = true # Optimize garbage collection
[metrics.debugging]
enabled = false # Enable debugging (development only)
verbose_logging = false # Enable verbose logging
debug_endpoint = "/debug/metrics" # Debug endpoint path
dump_metrics = false # Dump metrics to file
trace_collection = false # Trace metrics collection
profiling_enabled = false # Enable profiling
memory_profiling = false # Enable memory profiling
cpu_profiling = false # Enable CPU profiling
debug_labels = false # Add debug labels to metrics
validation_enabled = false # Enable metrics validation
[metrics.storage]
enabled = true # Enable metrics storage
storage_backend = "prometheus" # Storage backend: prometheus, influxdb, file
storage_path = "metrics_data" # Storage path for file backend
retention_policy = "30d" # Retention policy
compression_enabled = true # Enable storage compression
backup_enabled = true # Enable metrics backup
backup_schedule = "0 2 * * *" # Backup schedule (daily at 2 AM)
backup_retention = 7 # Backup retention in days
cleanup_enabled = true # Enable automatic cleanup
cleanup_schedule = "0 3 * * *" # Cleanup schedule (daily at 3 AM)
[metrics.integrations]
enabled = true # Enable external integrations
datadog_enabled = false # Enable Datadog integration
newrelic_enabled = false # Enable New Relic integration
dynatrace_enabled = false # Enable Dynatrace integration
splunk_enabled = false # Enable Splunk integration
elastic_enabled = false # Enable Elasticsearch integration
custom_integrations = [] # Custom integration configurations
webhook_integrations = [] # Webhook integration configurations
api_integrations = [] # API integration configurations
[metrics.compliance]
enabled = true # Enable compliance features
gdpr_compliance = true # Enable GDPR compliance
data_anonymization = true # Enable data anonymization
pii_scrubbing = true # Scrub PII from metrics
audit_trail = true # Maintain audit trail
data_retention_policy = true # Enforce data retention policy
consent_tracking = false # Track consent for metrics
right_to_deletion = true # Support right to deletion
data_export = true # Support data export
privacy_controls = true # Enable privacy controls