From 603be02b269ef430dcf8872ab427b51b4343e7c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jesu=CC=81s=20Pe=CC=81rez?= Date: Thu, 8 Jan 2026 21:22:57 +0000 Subject: [PATCH] chore: update docs, adr --- .markdownlint-cli2.jsonc | 4 +- .markdownlint.json | 61 - README.md | 106 +- bootstrap/install.sh | 71 +- config/ai.toml | 538 ++++++++ docs/src/SUMMARY.md | 21 + docs/src/ai/README.md | 170 +++ docs/src/architecture/adr/README.md | 12 +- .../adr/adr-013-typdialog-integration.md | 588 +++++++++ .../adr/adr-014-secretumvault-integration.md | 657 ++++++++++ .../adr-015-ai-integration-architecture.md | 1115 +++++++++++++++++ scripts/ensure-typedialog.sh | 222 ++++ scripts/install-typedialog.sh | 640 ++++++++++ scripts/setup-platform-config.sh | 13 + 14 files changed, 4136 insertions(+), 82 deletions(-) delete mode 100644 .markdownlint.json create mode 100644 config/ai.toml create mode 100644 docs/src/ai/README.md create mode 100644 docs/src/architecture/adr/adr-013-typdialog-integration.md create mode 100644 docs/src/architecture/adr/adr-014-secretumvault-integration.md create mode 100644 docs/src/architecture/adr/adr-015-ai-integration-architecture.md create mode 100755 scripts/ensure-typedialog.sh create mode 100755 scripts/install-typedialog.sh diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc index b1d73e6..90ab893 100644 --- a/.markdownlint-cli2.jsonc +++ b/.markdownlint-cli2.jsonc @@ -37,6 +37,7 @@ "MD012": false, // no-multiple-blanks (relaxed - allow formatting space) "MD024": false, // no-duplicate-heading (too strict for docs) "MD028": false, // no-blanks-blockquote (relaxed) + "MD031": false, // blanks-around-fences (too strict for technical docs) "MD047": true, // single-trailing-newline // Links and references @@ -71,7 +72,8 @@ "MD032": false, // blanks-around-lists (flexible spacing) "MD035": false, // hr-style (consistent) "MD036": false, // no-emphasis-as-heading - "MD044": false // proper-names + "MD044": false, // proper-names + "MD060": false // table-column-style (formatting preference) }, // Documentation patterns diff --git a/.markdownlint.json b/.markdownlint.json deleted file mode 100644 index e5799b4..0000000 --- a/.markdownlint.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "default": true, - "extends": "markdownlint/style/relaxed", - - "MD001": false, - "MD003": false, - "MD004": { "style": "consistent" }, - "MD005": false, - "MD007": { "indent": 2 }, - "MD009": true, - "MD010": true, - "MD011": true, - "MD012": false, - "MD013": { "line_length": 150, "code_blocks": true, "headers": true }, - "MD014": false, - "MD018": true, - "MD019": true, - "MD020": true, - "MD021": true, - "MD022": false, - "MD023": true, - "MD024": false, - "MD025": false, - "MD026": { "punctuation": ".,;:!?" }, - "MD027": true, - "MD028": false, - "MD029": false, - "MD030": { "ul_single": 1, "ol_single": 1, "ul_multi": 1, "ol_multi": 1 }, - "MD031": false, - "MD032": false, - "MD033": { "allowed_elements": ["br", "hr", "details", "summary", "p", "img"] }, - "MD034": true, - "MD035": false, - "MD036": false, - "MD037": true, - "MD039": true, - "MD040": true, - "MD041": false, - "MD042": true, - "MD043": false, - "MD044": false, - "MD045": true, - "MD046": { "style": "fenced" }, - "MD047": true, - "MD048": false, - "MD049": false, - "MD050": false, - "MD051": false, - "MD052": false, - "MD053": false, - "MD054": false, - "MD055": false, - "MD056": false, - "MD058": false, - "MD059": false, - "MD060": false, - "MD061": false, - "MD062": false, - "MD063": false, - "no-hard-tabs": true -} diff --git a/README.md b/README.md index 7ffc5de..161e7c8 100644 --- a/README.md +++ b/README.md @@ -465,7 +465,28 @@ Multi-mode installation system with TUI, CLI, and unattended modes. - **Deployment Modes**: Solo (2 CPU/4GB), MultiUser (4 CPU/8GB), CICD (8 CPU/16GB), Enterprise (16 CPU/32GB) - **MCP Integration**: 7 AI-powered settings tools for intelligent configuration -### 9. **Nushell Plugins Integration** (v1.0.0) +### 9. **Version Management System** (v3.6.0) + +Centralized tool and provider version management with bash-compatible export. + +- **Unified Version Source**: All versions defined in Nickel files (`versions.ncl` and provider `version.ncl`) +- **Generated Versions File**: Bash-compatible KEY="VALUE" format for shell scripts +- **Core Tools**: NUSHELL, NICKEL, SOPS, AGE, K9S with convenient aliases (NU for NUSHELL) +- **Provider Versions**: Automatically discovers and includes all provider versions (AWS, HCLOUD, UPCTL, etc.) +- **Command**: `provisioning setup versions` generates `/provisioning/core/versions` file +- **Shell Integration**: Can be sourced directly in bash scripts: `source /provisioning/core/versions && echo $NU_VERSION` +- **Usage**: + ```bash + # Generate versions file + provisioning setup versions + + # Use in bash scripts + source /provisioning/core/versions + echo "Using Nushell version: $NU_VERSION" + echo "AWS CLI version: $PROVIDER_AWS_VERSION" + ``` + +### 10. **Nushell Plugins Integration** (v1.0.0) Three native Rust plugins providing 10-50x performance improvements over HTTP API. @@ -478,7 +499,7 @@ Three native Rust plugins providing 10-50x performance improvements over HTTP AP - **KMS Backends**: RustyVault, Age, AWS KMS, Vault, Cosmian - **Graceful Fallback**: Automatic fallback to HTTP if plugins not installed -### 10. **Complete Security System** (v4.0.0) +### 11. **Complete Security System** (v4.0.0) Enterprise-grade security with 39,699 lines across 12 components. @@ -538,6 +559,25 @@ Enterprise-grade security with 39,699 lines across 12 components. | **JWT** | Latest | Authentication tokens | RS256 signatures, Argon2id password hashing | | **Keyring** | Latest | OS-native secure storage | macOS Keychain, Linux Secret Service, Windows Credential Manager | +### Version Management + +| Component | Purpose | Format | +|-----------|---------|--------| +| **versions.ncl** | Core tool versions (Nickel primary) | Nickel schema | +| **provider version.ncl** | Provider-specific versions | Nickel schema | +| **provisioning setup versions** | Version file generator | Nushell command | +| **versions file** | Bash-compatible exports | KEY="VALUE" format | + +**Usage**: +```bash +# Generate versions file from Nickel schemas +provisioning setup versions + +# Source in shell scripts +source /provisioning/core/versions +echo $NU_VERSION $PROVIDER_AWS_VERSION +``` + ### Optional Tools | Tool | Purpose | @@ -840,6 +880,17 @@ deploy-production: cd my-project ``` +3.5. **Generate Versions File** (Optional - for bash scripts) + + ```bash + provisioning setup versions + # Creates /provisioning/core/versions with all tool and provider versions + + # Use in your deployment scripts + source /provisioning/core/versions + echo "Deploying with Nushell $NU_VERSION and AWS CLI $PROVIDER_AWS_VERSION" + ``` + 4. **Define Infrastructure (Nickel)** ```bash @@ -965,6 +1016,13 @@ See **[docs/src/architecture/](docs/src/architecture/)** for design patterns: - Type-safe lazy evaluation - ~220 legacy files removed, ~250 new schema files added +- ✅ **v3.6.0** (2026-01-08) - Version Management System + - Centralized tool and provider version management + - Bash-compatible versions file generation + - `provisioning setup versions` command + - Automatic provider version discovery from Nickel schemas + - Shell script integration with sourcing support + - ✅ **v4.0.0** (2025-10-09) - Complete Security System (12 components, 39,699 lines) - ✅ **v3.5.0** (2025-10-07) - Platform Installer with TUI and CI/CD modes - ✅ **v3.4.0** (2025-10-06) - Test Environment Service with container management @@ -1014,6 +1072,48 @@ See [LICENSE](LICENSE) file in project root. --- **Maintained By**: Architecture Team -**Last Updated**: 2026-01-08 (Nickel v5.0.0 Migration) +**Last Updated**: 2026-01-08 (Version Management System v3.6.0 + Nickel v5.0.0 Migration Complete) **Current Branch**: nickel **Project Home**: [provisioning/](provisioning/) + +--- + +## Recent Changes (2026-01-08) + +### Version Management System (v3.6.0) + +**What Changed**: +- ✅ Implemented `provisioning setup versions` command +- ✅ Generates bash-compatible `/provisioning/core/versions` file +- ✅ Automatically discovers and includes all provider versions from Nickel schemas +- ✅ Fixed to remove redundant metadata (all sources are Nickel) +- ✅ Core tools with aliases: NUSHELL→NU, NICKEL, SOPS, AGE, K9S +- ✅ Shell script integration: `source /provisioning/core/versions && echo $NU_VERSION` + +**Files Modified**: +- `provisioning/core/nulib/lib_provisioning/setup/utils.nu` - Core implementation +- `provisioning/core/nulib/main_provisioning/commands/setup.nu` - Command routing +- `provisioning/core/nulib/lib_provisioning/workspace/enforcement.nu` - Workspace exemption +- `provisioning/README.md` - Documentation updates + +**Generated File Example**: +```bash +NUSHELL_VERSION="0.109.1" +NUSHELL_SOURCE="https://github.com/nushell/nushell/releases" +NU_VERSION="0.109.1" +NU_SOURCE="https://github.com/nushell/nushell/releases" + +NICKEL_VERSION="1.15.1" +NICKEL_SOURCE="https://github.com/tweag/nickel/releases" + +PROVIDER_AWS_VERSION="2.32.11" +PROVIDER_AWS_SOURCE="https://github.com/aws/aws-cli/releases" +# ... and more providers +``` + +**Key Improvements**: +- Clean metadata (no redundant `_LIB` fields - all sources are Nickel) +- Automatic provider discovery from `extensions/providers/*/nickel/version.ncl` +- Direct Nickel file parsing with JSON export +- Zero dependency on environment variables or legacy systems +- 100% bash/shell compatible for deployment scripts diff --git a/bootstrap/install.sh b/bootstrap/install.sh index cb73b23..66da6fe 100755 --- a/bootstrap/install.sh +++ b/bootstrap/install.sh @@ -131,10 +131,45 @@ echo " Rust: $(rustc --version)" echo " Nushell: $(nu --version)" # ════════════════════════════════════════════════════════════════════════ -# STAGE 3: CREATE DIRECTORY STRUCTURE +# STAGE 3: INSTALL TYPEDIALOG (Configuration Forms & Tools) # ════════════════════════════════════════════════════════════════════════ -print_stage "📁" "Stage 3: Creating Directory Structure" +print_stage "🎨" "Stage 3: Installing TypeDialog (Forms & Tools)" + +TYPEDIALOG_INSTALLER="$PROJECT_ROOT/provisioning/scripts/install-typedialog.sh" + +# Check if TypeDialog is already installed +if check_command "typedialog"; then + TYPEDIALOG_VERSION=$(typedialog --version 2>/dev/null | head -1) + print_success "TypeDialog already installed ($TYPEDIALOG_VERSION)" +else + print_warning "TypeDialog not found - installing..." + + if [[ ! -f "$TYPEDIALOG_INSTALLER" ]]; then + print_error "TypeDialog installer not found at $TYPEDIALOG_INSTALLER" + exit 1 + fi + + # Install TypeDialog with all components (backends + tools) + if bash "$TYPEDIALOG_INSTALLER" --components all --skip-validation; then + print_success "TypeDialog installed successfully" + + # Verify installation + if check_command "typedialog"; then + TYPEDIALOG_VERSION=$(typedialog --version 2>/dev/null | head -1) + echo " Installed: $TYPEDIALOG_VERSION" + fi + else + print_error "Failed to install TypeDialog" + exit 1 + fi +fi + +# ════════════════════════════════════════════════════════════════════════ +# STAGE 4: CREATE DIRECTORY STRUCTURE +# ════════════════════════════════════════════════════════════════════════ + +print_stage "📁" "Stage 4: Creating Directory Structure" mkdir -p "$WORKSPACE_PATH/config/generated/providers" mkdir -p "$WORKSPACE_PATH/config/generated/platform" @@ -147,10 +182,10 @@ mkdir -p "$WORKSPACE_PATH/.clusters" print_success "Directory structure created" # ════════════════════════════════════════════════════════════════════════ -# STAGE 4: VALIDATE CONFIGURATION +# STAGE 5: VALIDATE CONFIGURATION # ════════════════════════════════════════════════════════════════════════ -print_stage "⚙️ " "Stage 4: Validating Configuration" +print_stage "⚙️ " "Stage 5: Validating Configuration" CONFIG_NCL="$WORKSPACE_PATH/config/config.ncl" @@ -178,10 +213,10 @@ fi print_success "Configuration syntax valid" # ════════════════════════════════════════════════════════════════════════ -# STAGE 5: EXPORT CONFIGURATION +# STAGE 6: EXPORT CONFIGURATION # ════════════════════════════════════════════════════════════════════════ -print_stage "📤" "Stage 5: Exporting Configuration to TOML" +print_stage "📤" "Stage 6: Exporting Configuration to TOML" cd "$PROJECT_ROOT" @@ -196,10 +231,10 @@ else fi # ════════════════════════════════════════════════════════════════════════ -# STAGE 6: INITIALIZE ORCHESTRATOR +# STAGE 7: INITIALIZE ORCHESTRATOR # ════════════════════════════════════════════════════════════════════════ -print_stage "🚀" "Stage 6: Initializing Orchestrator Service" +print_stage "🚀" "Stage 7: Initializing Orchestrator Service" ORCHESTRATOR_PATH="$PROJECT_ROOT/provisioning/platform/orchestrator" @@ -219,10 +254,10 @@ else fi # ════════════════════════════════════════════════════════════════════════ -# STAGE 7: VERIFY INSTALLATION +# STAGE 8: VERIFY INSTALLATION # ════════════════════════════════════════════════════════════════════════ -print_stage "✅" "Stage 7: Verification" +print_stage "✅" "Stage 8: Verification" FILES_CHECK=( "config/generated/workspace.toml" @@ -280,19 +315,27 @@ echo "" echo "📍 Next Steps:" echo "" -echo "1. Verify configuration:" +echo "1. Verify TypeDialog is available:" +echo " typedialog --version && typedialog-web --version" +echo "" +echo "2. Configure platform services:" +echo " $PROJECT_ROOT/provisioning/scripts/setup-platform-config.sh" +echo "" +echo "3. Verify configuration:" echo " cat $WORKSPACE_PATH/config/config.ncl" echo "" -echo "2. Check orchestrator is running:" +echo "4. Check orchestrator is running:" echo " curl http://localhost:9090/health" echo "" -echo "3. Start provisioning:" +echo "5. Start provisioning:" echo " provisioning server create --infra sgoyol --name web-01" echo "" -echo "4. Monitor workflow:" +echo "6. Monitor workflow:" echo " provisioning workflow monitor " echo "" echo "📚 Documentation:" +echo " - TypeDialog Forms: $PROJECT_ROOT/provisioning/.typedialog/platform/README.md" +echo " - Setup Guide: $PROJECT_ROOT/provisioning/scripts/setup-platform-config.sh --help" echo " - User Guide: docs/user/TYPEDIALOG_PLATFORM_CONFIG_GUIDE.md" echo " - Architecture: ARCHITECTURE_CLARIFICATION.md" echo " - Workflow: PROVISIONING_WORKFLOW.md" diff --git a/config/ai.toml b/config/ai.toml new file mode 100644 index 0000000..a334009 --- /dev/null +++ b/config/ai.toml @@ -0,0 +1,538 @@ +# AI Integration Configuration for Provisioning Platform +# This file configures the AI system including LLM providers, RAG, MCP, and security policies. + +# ============================================================================ +# Core AI Configuration +# ============================================================================ + +[ai] +# Enable/disable AI features globally +enabled = true + +# LLM Provider Selection +# Options: "anthropic" | "openai" | "local" | "azure-openai" +provider = "anthropic" + +# Model Selection +# Anthropic: "claude-sonnet-4", "claude-opus-4", "claude-haiku-4" +# OpenAI: "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo" +# Local: "llama-3-70b", "mistral-large", "codellama-34b" +model = "claude-sonnet-4" + +# Model Temperature (0.0-1.0) +# Lower = more deterministic, Higher = more creative +temperature = 0.7 + +# Maximum tokens for responses +max_tokens = 4096 + +# Request timeout (seconds) +timeout = 60 + +# ============================================================================ +# AI Features - Fine-Grained Control +# ============================================================================ + +[ai.features] +# AI-assisted form filling (typdialog-ai) +# Real-time suggestions and field value predictions +form_assistance = true + +# Natural language configuration generation (typdialog-prov-gen) +# Convert plain English to Nickel configs +config_generation = true + +# Autonomous AI agents (typdialog-ag) +# WARNING: Agents can execute multi-step workflows +# Recommended: false for production (enable per-use-case) +autonomous_agents = false + +# AI-powered troubleshooting +# Analyze logs and suggest fixes for failed deployments +troubleshooting = true + +# Configuration optimization +# AI reviews configs and suggests improvements +optimization = true + +# Validation error explanations +# AI explains Nickel validation errors in plain language +error_explanations = true + +# ============================================================================ +# LLM Provider Configuration +# ============================================================================ + +[ai.anthropic] +# Anthropic Claude API configuration +api_key = "env:ANTHROPIC_API_KEY" # Load from environment variable +api_url = "https://api.anthropic.com/v1" +max_retries = 3 +retry_delay_ms = 1000 + +# Rate limits (per minute) +max_requests_per_minute = 50 +max_tokens_per_minute = 100000 + +[ai.openai] +# OpenAI GPT-4 API configuration +api_key = "env:OPENAI_API_KEY" +api_url = "https://api.openai.com/v1" +organization_id = "" # Optional +max_retries = 3 +retry_delay_ms = 1000 + +# Rate limits (per minute) +max_requests_per_minute = 60 +max_tokens_per_minute = 150000 + +[ai.local] +# Local LLM configuration (Ollama, LlamaCpp, vLLM) +# Use for air-gapped deployments or privacy-critical scenarios +model_path = "/opt/provisioning/models/llama-3-70b" +server_url = "http://localhost:11434" # Ollama default +context_length = 8192 +num_gpu_layers = 40 # GPU acceleration + +# ============================================================================ +# Model Context Protocol (MCP) Server +# ============================================================================ + +[ai.mcp] +# MCP server configuration +enabled = true +server_url = "http://localhost:9000" +timeout = 30 +max_retries = 3 + +# Tool calling configuration +[ai.mcp.tools] +enabled = true + +# Available tools for LLM +# Tools provide structured actions the LLM can invoke +tools = [ + "nickel_validate", # Validate Nickel configuration + "schema_query", # Query Nickel schema information + "config_generate", # Generate configuration snippets + "cedar_check", # Check Cedar authorization policies + "deployment_status", # Query deployment status + "log_analyze", # Analyze deployment logs +] + +# ============================================================================ +# Retrieval-Augmented Generation (RAG) +# ============================================================================ + +[ai.rag] +# Enable RAG system +enabled = true + +# Vector Store Configuration +# Options: "qdrant" | "milvus" | "pgvector" | "chromadb" +vector_store = "qdrant" +vector_store_url = "http://localhost:6333" +collection_name = "provisioning-knowledge" + +# Embedding Model +# OpenAI: "text-embedding-3-large", "text-embedding-3-small" +# Local: "all-MiniLM-L6-v2", "bge-large-en-v1.5" +embedding_model = "text-embedding-3-large" +embedding_api_key = "env:OPENAI_API_KEY" # For OpenAI embeddings + +# Document Chunking +chunk_size = 512 # Characters per chunk +chunk_overlap = 50 # Overlap between chunks +max_chunks_per_query = 10 # Top-k retrieval + +# ============================================================================ +# RAG Index Configuration +# ============================================================================ + +[ai.rag.index] +# What to index for RAG retrieval + +# Index Nickel schemas (RECOMMENDED: true) +# Provides AI with schema definitions and contracts +schemas = true +schemas_path = "provisioning/schemas" + +# Index documentation (RECOMMENDED: true) +# Provides AI with user guides and best practices +docs = true +docs_path = "docs" + +# Index past deployments (RECOMMENDED: true) +# AI learns from successful deployment patterns +deployments = true +deployments_path = "workspaces" + +# Index best practices (RECOMMENDED: true) +# Inject organizational patterns and conventions +best_practices = true +best_practices_path = ".claude/patterns" + +# Index deployment logs (WARNING: Privacy concerns) +# Logs may contain sensitive data, enable only if sanitized +logs = false +logs_retention_days = 30 + +# Reindexing schedule +auto_reindex = true +reindex_interval_hours = 24 + +# ============================================================================ +# Security and Access Control +# ============================================================================ + +[ai.security] +# Cedar policy store for AI access control +cedar_policy_store = "/etc/provisioning/cedar-policies/ai" + +# AI cannot suggest secret values (CRITICAL: keep true) +# AI can suggest secret names/paths but not retrieve actual secrets +max_secret_suggestions = 0 + +# Require human approval for critical operations (CRITICAL: keep true) +# Operations requiring approval: +# - Deployments to production +# - Configuration changes affecting security +# - Secret rotation +# - Infrastructure deletion +require_human_approval = true + +# Audit all AI operations (CRITICAL: keep true) +# Log every AI request, response, and action +audit_all_operations = true + +# Data sanitization before sending to LLM +# Remove sensitive data from prompts +[ai.security.sanitization] +sanitize_secrets = true # Remove secret values +sanitize_pii = true # Remove personally identifiable info +sanitize_credentials = true # Remove passwords, API keys +sanitize_ip_addresses = false # Keep for troubleshooting + +# Allowed data for LLM +allowed_data = [ + "nickel_schemas", # Schema definitions (public) + "documentation", # User docs (public) + "error_messages", # Validation errors (sanitized) + "resource_names", # Infrastructure resource identifiers +] + +# Forbidden data for LLM (NEVER send to external LLM) +forbidden_data = [ + "secret_values", # Passwords, API keys, tokens + "private_keys", # SSH keys, TLS keys, encryption keys + "pii", # Email addresses, names, phone numbers + "credentials", # Authentication credentials + "session_tokens", # User session data +] + +# ============================================================================ +# Rate Limiting and Cost Control +# ============================================================================ + +[ai.rate_limiting] +# Per-user rate limits +requests_per_minute = 60 +requests_per_hour = 500 +requests_per_day = 2000 + +# Token limits (to control LLM API costs) +tokens_per_day = 1000000 # 1M tokens/day +tokens_per_month = 30000000 # 30M tokens/month + +# Cost limits (USD) +cost_limit_per_day = "100.00" +cost_limit_per_month = "2000.00" + +# Alert thresholds +cost_alert_threshold = 0.8 # Alert at 80% of limit + +# Rate limit exceeded behavior +# Options: "queue" | "reject" | "throttle" +exceed_behavior = "queue" +max_queue_size = 100 + +# ============================================================================ +# Caching +# ============================================================================ + +[ai.caching] +# Enable response caching to reduce LLM API calls +enabled = true + +# Cache TTL (time-to-live) +ttl = "1h" + +# Cache backend +# Options: "redis" | "memcached" | "in-memory" +backend = "redis" +redis_url = "redis://localhost:6379" + +# Cache key strategy +# "prompt" = Cache by exact prompt (high precision, low hit rate) +# "semantic" = Cache by semantic similarity (lower precision, high hit rate) +cache_strategy = "semantic" +semantic_similarity_threshold = 0.95 + +# Cache statistics +track_hit_rate = true +log_cache_misses = false + +# ============================================================================ +# Observability and Monitoring +# ============================================================================ + +[ai.observability] +# Logging level for AI operations +# Options: "trace" | "debug" | "info" | "warn" | "error" +log_level = "info" + +# Trace all AI requests (detailed logging) +# WARNING: Generates large log volume +trace_all_requests = true + +# Store conversation history (for debugging and learning) +store_conversations = true +conversation_retention_days = 30 + +# Metrics collection +[ai.observability.metrics] +enabled = true +export_format = "prometheus" # "prometheus" | "opentelemetry" +export_port = 9090 + +# Metrics to collect +metrics = [ + "request_count", # Total AI requests + "request_duration", # Latency histogram + "token_usage", # Input/output tokens + "cost_tracking", # USD cost per request + "cache_hit_rate", # Cache effectiveness + "validation_success_rate", # Generated config validity + "human_approval_rate", # How often humans approve AI output +] + +# Distributed tracing +[ai.observability.tracing] +enabled = true +jaeger_endpoint = "http://localhost:14268/api/traces" +sample_rate = 0.1 # Sample 10% of requests + +# ============================================================================ +# AI Agent Configuration (typdialog-ag) +# ============================================================================ + +[ai.agents] +# WARNING: Autonomous agents can execute multi-step workflows +# Enable with caution, only for trusted users + +# Enable AI agents globally +enabled = false + +# Maximum iterations per agent execution +# Prevents infinite loops +max_iterations = 20 + +# Agent timeout (seconds) +timeout = 300 + +# Require approval for each agent action (RECOMMENDED: true) +# If false, agent executes entire workflow autonomously +require_step_approval = true + +# Agent types +[ai.agents.types] +# Provisioning agent: End-to-end infrastructure setup +provisioning_agent = false + +# Troubleshooting agent: Diagnose and fix deployment issues +troubleshooting_agent = true + +# Optimization agent: Analyze and improve configurations +optimization_agent = true + +# Security audit agent: Review configs for vulnerabilities +security_audit_agent = true + +# ============================================================================ +# Configuration Generation (typdialog-prov-gen) +# ============================================================================ + +[ai.config_generation] +# Default schema for generated configs +default_schema = "workspace" + +# Validation mode +# "strict" = Reject any invalid config +# "permissive" = Allow configs with warnings +validation_mode = "strict" + +# Best practice injection +# Automatically add security/performance best practices +inject_best_practices = true + +# Template usage +# Use pre-defined templates as starting points +use_templates = true +template_directory = "provisioning/templates" + +# ============================================================================ +# Form Assistance (typdialog-ai) +# ============================================================================ + +[ai.form_assistance] +# Real-time suggestions as user types +real_time_suggestions = true + +# Minimum characters before triggering suggestions +min_chars_for_suggestions = 3 + +# Maximum suggestions per field +max_suggestions = 5 + +# Suggestion confidence threshold (0.0-1.0) +# Only show suggestions with confidence above threshold +confidence_threshold = 0.7 + +# Natural language form filling +# User can describe entire form in plain English +nl_form_filling = true + +# ============================================================================ +# Environment-Specific Overrides +# ============================================================================ + +# Development environment +[ai.environments.dev] +enabled = true +provider = "openai" # Cheaper for dev +model = "gpt-4-turbo" +require_human_approval = false # Faster iteration +cost_limit_per_day = "10.00" + +# Staging environment +[ai.environments.staging] +enabled = true +provider = "anthropic" +model = "claude-sonnet-4" +require_human_approval = true +cost_limit_per_day = "50.00" + +# Production environment +[ai.environments.production] +enabled = true +provider = "anthropic" +model = "claude-sonnet-4" +require_human_approval = true # ALWAYS true for production +autonomous_agents = false # NEVER enable in production +cost_limit_per_day = "100.00" + +# ============================================================================ +# Integration with Other Services +# ============================================================================ + +[ai.integration] +# Orchestrator integration +orchestrator_url = "https://orchestrator.example.com" +orchestrator_api_key = "env:ORCHESTRATOR_API_KEY" + +# SecretumVault integration (for secret name suggestions only) +secretum_vault_url = "https://vault.example.com:8200" +secretum_vault_token = "env:VAULT_TOKEN" +# AI can query secret names/paths but NEVER values + +# Typdialog Web UI integration +typdialog_url = "https://forms.provisioning.example.com" +typdialog_websocket_enabled = true + +# ============================================================================ +# Advanced Settings +# ============================================================================ + +[ai.advanced] +# Prompt engineering +system_prompt_template = "provisioning/ai/prompts/system.txt" +user_prompt_template = "provisioning/ai/prompts/user.txt" + +# Context window management +max_context_tokens = 100000 # Claude Sonnet 4 context window +context_truncation_strategy = "sliding_window" # "sliding_window" | "summarize" + +# Streaming responses +enable_streaming = true +stream_chunk_size = 100 # Characters per chunk + +# Concurrent requests +max_concurrent_requests = 10 + +# ============================================================================ +# Experimental Features (Use at Your Own Risk) +# ============================================================================ + +[ai.experimental] +# Multi-agent collaboration +# Multiple AI agents work together on complex tasks +multi_agent_collaboration = false + +# Reinforcement learning from human feedback (RLHF) +# Learn from user corrections to improve over time +rlhf_enabled = false + +# Fine-tuning on deployment history +# Train custom models on organization-specific patterns +fine_tuning = false +fine_tuning_dataset_path = "provisioning/ai/fine-tuning-data" + +# ============================================================================ +# Compliance and Legal +# ============================================================================ + +[ai.compliance] +# Data residency requirements +# Ensure LLM provider complies with data residency laws +data_residency = "us" # "us" | "eu" | "local" + +# GDPR compliance mode +gdpr_mode = false +gdpr_data_retention_days = 90 + +# SOC 2 compliance logging +soc2_logging = false + +# Terms of service acceptance +# Must explicitly accept LLM provider TOS +tos_accepted = false +tos_version = "2025-01-08" + +# IMPORTANT NOTES: +# +# 1. API Keys: NEVER hardcode API keys. Always use environment variables. +# Example: api_key = "env:ANTHROPIC_API_KEY" +# +# 2. Security: Keep require_human_approval = true for production. +# AI-generated configs must be reviewed by humans. +# +# 3. Costs: Monitor LLM API usage. Set appropriate cost_limit_per_day. +# Default limits are conservative but may need adjustment. +# +# 4. Privacy: For sensitive workloads, use local models (no external API calls). +# Set provider = "local" and configure local model path. +# +# 5. RAG Index: Regularly reindex to keep AI knowledge up-to-date. +# Set auto_reindex = true and adjust reindex_interval_hours. +# +# 6. Cedar Policies: Define fine-grained AI access control in Cedar. +# Location: /etc/provisioning/cedar-policies/ai +# +# 7. Audit Logs: AI operations are security-critical. Keep audit_all_operations = true. +# Logs stored in: /var/log/provisioning/ai-audit.log +# +# 8. Agents: Autonomous agents are powerful but risky. +# Enable only for specific use cases, never globally in production. + +# Version: 1.0 +# Last Updated: 2025-01-08 diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index f56db4c..594bd9d 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -53,6 +53,27 @@ - [ADR-010: Configuration Format Strategy](architecture/adr/ADR-010-configuration-format-strategy.md) - [ADR-011: Nickel Migration](architecture/adr/ADR-011-nickel-migration.md) - [ADR-012: Nushell Nickel Plugin CLI Wrapper](architecture/adr/adr-012-nushell-nickel-plugin-cli-wrapper.md) +- [ADR-013: Typdialog Web UI Backend Integration](architecture/adr/adr-013-typdialog-integration.md) +- [ADR-014: SecretumVault Integration](architecture/adr/adr-014-secretumvault-integration.md) +- [ADR-015: AI Integration Architecture](architecture/adr/adr-015-ai-integration-architecture.md) + +--- + +## AI Integration + +- [Overview](ai/README.md) +- [Architecture](ai/architecture.md) +- [Natural Language Configuration](ai/natural-language-config.md) +- [AI-Assisted Forms](ai/ai-assisted-forms.md) +- [AI Agents](ai/ai-agents.md) +- [Configuration Generation](ai/config-generation.md) +- [RAG System](ai/rag-system.md) +- [MCP Integration](ai/mcp-integration.md) +- [Security Policies](ai/security-policies.md) +- [Troubleshooting with AI](ai/troubleshooting-with-ai.md) +- [API Reference](ai/api-reference.md) +- [Configuration](ai/configuration.md) +- [Cost Management](ai/cost-management.md) --- diff --git a/docs/src/ai/README.md b/docs/src/ai/README.md new file mode 100644 index 0000000..e6ac0cc --- /dev/null +++ b/docs/src/ai/README.md @@ -0,0 +1,170 @@ +# AI Integration - Intelligent Infrastructure Provisioning + +The provisioning platform integrates AI capabilities to provide intelligent assistance for infrastructure configuration, deployment, and troubleshooting. +This section documents the AI system architecture, features, and usage patterns. + +## Overview + +The AI integration consists of multiple components working together to provide intelligent infrastructure provisioning: + +- **typdialog-ai**: AI-assisted form filling and configuration +- **typdialog-ag**: Autonomous AI agents for complex workflows +- **typdialog-prov-gen**: Natural language to Nickel configuration generation +- **ai-service**: Core AI service backend with multi-provider support +- **mcp-server**: Model Context Protocol server for LLM integration +- **rag**: Retrieval-Augmented Generation for contextual knowledge + +## Key Features + +### Natural Language Configuration + +Generate infrastructure configurations from plain English descriptions: +```bash +provisioning ai generate "Create a production PostgreSQL cluster with encryption and daily backups" +``` + +### AI-Assisted Forms + +Real-time suggestions and explanations as you fill out configuration forms via typdialog web UI. + +### Intelligent Troubleshooting + +AI analyzes deployment failures and suggests fixes: +```bash +provisioning ai troubleshoot deployment-12345 +``` + +### + + Configuration Optimization +AI reviews configurations and suggests performance and security improvements: +```bash +provisioning ai optimize workspaces/prod/config.ncl +``` + +### Autonomous Agents +AI agents execute multi-step workflows with minimal human intervention: +```bash +provisioning ai agent --goal "Set up complete dev environment for Python app" +``` + +## Documentation Structure + +- [Architecture](architecture.md) - AI system architecture and components +- [Natural Language Config](natural-language-config.md) - NL to Nickel generation +- [AI-Assisted Forms](ai-assisted-forms.md) - typdialog-ai integration +- [AI Agents](ai-agents.md) - typdialog-ag autonomous agents +- [Config Generation](config-generation.md) - typdialog-prov-gen details +- [RAG System](rag-system.md) - Retrieval-Augmented Generation +- [MCP Integration](mcp-integration.md) - Model Context Protocol +- [Security Policies](security-policies.md) - Cedar policies for AI +- [Troubleshooting with AI](troubleshooting-with-ai.md) - AI debugging workflows +- [API Reference](api-reference.md) - AI service API documentation +- [Configuration](configuration.md) - AI system configuration guide +- [Cost Management](cost-management.md) - Managing LLM API costs + +## Quick Start + +### Enable AI Features + +```bash +# Edit provisioning config +vim provisioning/config/ai.toml + +# Set provider and enable features +[ai] +enabled = true +provider = "anthropic" # or "openai" or "local" +model = "claude-sonnet-4" + +[ai.features] +form_assistance = true +config_generation = true +troubleshooting = true +``` + +### Generate Configuration from Natural Language + +```bash +# Simple generation +provisioning ai generate "PostgreSQL database with encryption" + +# With specific schema +provisioning ai generate \ + --schema database \ + --output workspaces/dev/db.ncl \ + "Production PostgreSQL with 100GB storage and daily backups" +``` + +### Use AI-Assisted Forms + +```bash +# Open typdialog web UI with AI assistance +provisioning workspace init --interactive --ai-assist + +# AI provides real-time suggestions as you type +# AI explains validation errors in plain English +# AI fills multiple fields from natural language description +``` + +### Troubleshoot with AI + +```bash +# Analyze failed deployment +provisioning ai troubleshoot deployment-12345 + +# AI analyzes logs and suggests fixes +# AI generates corrected configuration +# AI explains root cause in plain language +``` + +## Security and Privacy + +The AI system implements strict security controls: + +- ✅ **Cedar Policies**: AI access controlled by Cedar authorization +- ✅ **Secret Isolation**: AI cannot access secrets directly +- ✅ **Human Approval**: Critical operations require human approval +- ✅ **Audit Trail**: All AI operations logged +- ✅ **Data Sanitization**: Secrets/PII sanitized before sending to LLM +- ✅ **Local Models**: Support for air-gapped deployments + +See [Security Policies](security-policies.md) for complete details. + +## Supported LLM Providers + +| Provider | Models | Best For | +|----------|--------|----------| +| **Anthropic** | Claude Sonnet 4, Claude Opus 4 | Complex configs, long context | +| **OpenAI** | GPT-4 Turbo, GPT-4 | Fast suggestions, tool calling | +| **Local** | Llama 3, Mistral | Air-gapped, privacy-critical | + +## Cost Considerations + +AI features incur LLM API costs. The system implements cost controls: + +- **Caching**: Reduces API calls by 50-80% +- **Rate Limiting**: Prevents runaway costs +- **Budget Limits**: Daily/monthly cost caps +- **Local Models**: Zero marginal cost for air-gapped deployments + +See [Cost Management](cost-management.md) for optimization strategies. + +## Architecture Decision Record + +The AI integration is documented in: +- [ADR-015: AI Integration Architecture](../architecture/adr/adr-015-ai-integration-architecture.md) + +## Next Steps + +1. Read [Architecture](architecture.md) to understand AI system design +2. Configure AI features in [Configuration](configuration.md) +3. Try [Natural Language Config](natural-language-config.md) for your first AI-generated config +4. Explore [AI Agents](ai-agents.md) for automation workflows +5. Review [Security Policies](security-policies.md) to understand access controls + +--- + +**Version**: 1.0 +**Last Updated**: 2025-01-08 +**Status**: Active diff --git a/docs/src/architecture/adr/README.md b/docs/src/architecture/adr/README.md index e448132..feebd97 100644 --- a/docs/src/architecture/adr/README.md +++ b/docs/src/architecture/adr/README.md @@ -21,9 +21,15 @@ This directory contains all Architecture Decision Records for the provisioning p - **ADR-010**: [Configuration Format Strategy](adr-010-configuration-format-strategy.md) - When to use Nickel, TOML, YAML, or KCL - **ADR-011**: [Nickel Migration](adr-011-nickel-migration.md) - Migration from KCL to Nickel as primary IaC language -### Platform Services (ADR-012) +### Platform Services (ADR-012 to ADR-014) - **ADR-012**: [Nushell Nickel Plugin CLI Wrapper](adr-012-nushell-nickel-plugin-cli-wrapper.md) - Plugin architecture for Nickel integration +- **ADR-013**: [Typdialog Web UI Backend Integration](adr-013-typdialog-integration.md) - Browser-based configuration forms with multi-user collaboration +- **ADR-014**: [SecretumVault Integration](adr-014-secretumvault-integration.md) - Centralized secrets management with dynamic credentials + +### AI and Intelligence (ADR-015) + +- **ADR-015**: [AI Integration Architecture](adr-015-ai-integration-architecture.md) - Comprehensive AI system for intelligent infrastructure provisioning ## How to Use ADRs @@ -50,5 +56,5 @@ Each ADR follows this standard structure: --- -**Last Updated**: 2025-01-03 -**Total ADRs**: 12 +**Last Updated**: 2025-01-08 +**Total ADRs**: 15 diff --git a/docs/src/architecture/adr/adr-013-typdialog-integration.md b/docs/src/architecture/adr/adr-013-typdialog-integration.md new file mode 100644 index 0000000..b612339 --- /dev/null +++ b/docs/src/architecture/adr/adr-013-typdialog-integration.md @@ -0,0 +1,588 @@ +# ADR-013: Typdialog Web UI Backend Integration for Interactive Configuration + +## Status + +**Accepted** - 2025-01-08 + +## Context + +The provisioning system requires interactive user input for configuration workflows, workspace initialization, credential setup, and guided deployment scenarios. The system architecture combines Rust (performance-critical), Nushell (scripting), and Nickel (declarative configuration), creating challenges for interactive form-based input and multi-user collaboration. + +### The Interactive Configuration Problem + +**Current limitations**: + +1. **Nushell CLI**: Terminal-only interaction + - `input` command: Single-line text prompts only + - No form validation, no complex multi-field forms + - Limited to single-user, terminal-bound workflows + - User experience: Basic and error-prone + +2. **Nickel**: Declarative configuration language + - Cannot handle interactive prompts (by design) + - Pure evaluation model (no side effects) + - Forms must be defined statically, not interactively + - No runtime user interaction + +3. **Existing Solutions**: Inadequate for modern infrastructure provisioning + - **Shell-based prompts**: Error-prone, no validation, single-user + - **Custom web forms**: High maintenance, inconsistent UX + - **Separate admin panels**: Disconnected from IaC workflow + - **Terminal-only TUI**: Limited to SSH sessions, no collaboration + +### Use Cases Requiring Interactive Input + +1. **Workspace Initialization**: + ```nushell + # Current: Error-prone prompts + let workspace_name = input "Workspace name: " + let provider = input "Provider (aws/azure/oci): " + # No validation, no autocomplete, no guidance + ``` + +2. **Credential Setup**: + ```nushell + # Current: Insecure and basic + let api_key = input "API Key: " # Shows in terminal history + let region = input "Region: " # No validation + ``` + +3. **Configuration Wizards**: + - Database connection setup (host, port, credentials, SSL) + - Network configuration (CIDR blocks, subnets, gateways) + - Security policies (encryption, access control, audit) + +4. **Guided Deployments**: + - Multi-step infrastructure provisioning + - Service selection with dependencies + - Environment-specific overrides + +### Requirements for Interactive Input System + +- ✅ **Terminal UI widgets**: Text input, password, select, multi-select, confirm +- ✅ **Validation**: Type checking, regex patterns, custom validators +- ✅ **Security**: Password masking, sensitive data handling +- ✅ **User Experience**: Arrow key navigation, autocomplete, help text +- ✅ **Composability**: Chain multiple prompts into forms +- ✅ **Error Handling**: Clear validation errors, retry logic +- ✅ **Rust Integration**: Native Rust library (no subprocess overhead) +- ✅ **Cross-Platform**: Works on Linux, macOS, Windows + +## Decision + +Integrate **typdialog** with its **Web UI backend** as the standard interactive configuration interface for the provisioning platform. The major achievement of typdialog is not the TUI - it is the Web UI backend that enables browser-based forms, multi-user collaboration, and seamless integration with the provisioning orchestrator. + +### Architecture Diagram + +``` +┌─────────────────────────────────────────┐ +│ Nushell Script │ +│ │ +│ provisioning workspace init │ +│ provisioning config setup │ +│ provisioning deploy guided │ +└────────────┬────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────┐ +│ Rust CLI Handler │ +│ (provisioning/core/cli/) │ +│ │ +│ - Parse command │ +│ - Determine if interactive needed │ +│ - Invoke TUI dialog module │ +└────────────┬────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────┐ +│ TUI Dialog Module │ +│ (typdialog wrapper) │ +│ │ +│ - Form definition (validation rules) │ +│ - Widget rendering (text, select) │ +│ - User input capture │ +│ - Validation execution │ +│ - Result serialization (JSON/TOML) │ +└────────────┬────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────┐ +│ typdialog Library │ +│ │ +│ - Terminal rendering (crossterm) │ +│ - Event handling (keyboard, mouse) │ +│ - Widget state management │ +│ - Input validation engine │ +└────────────┬────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────┐ +│ Terminal (stdout/stdin) │ +│ │ +│ ✅ Rich TUI with validation │ +│ ✅ Secure password input │ +│ ✅ Guided multi-step forms │ +└─────────────────────────────────────────┘ +``` + +### Implementation Characteristics + +**CLI Integration Provides**: + +- ✅ Native Rust commands with TUI dialogs +- ✅ Form-based input for complex configurations +- ✅ Validation rules defined in Rust (type-safe) +- ✅ Secure input (password masking, no history) +- ✅ Error handling with retry logic +- ✅ Serialization to Nickel/TOML/JSON + +**TUI Dialog Library Handles**: + +- ✅ Terminal UI rendering and event loop +- ✅ Widget management (text, select, checkbox, confirm) +- ✅ Input validation and error display +- ✅ Navigation (arrow keys, tab, enter) +- ✅ Cross-platform terminal compatibility + +## Rationale + +### Why TUI Dialog Integration Is Required + +| Aspect | Shell Prompts (current) | Web Forms | TUI Dialog (chosen) | +|--------|-------------------------|-----------|---------------------| +| **User Experience** | ❌ Basic text only | ✅ Rich UI | ✅ Rich TUI | +| **Validation** | ❌ Manual, error-prone | ✅ Built-in | ✅ Built-in | +| **Security** | ❌ Plain text, history | ⚠️ Network risk | ✅ Secure terminal | +| **Setup Complexity** | ✅ None | ❌ Server required | ✅ Minimal | +| **Terminal Workflow** | ✅ Native | ❌ Browser switch | ✅ Native | +| **Offline Support** | ✅ Always | ❌ Requires server | ✅ Always | +| **Dependencies** | ✅ None | ❌ Web stack | ✅ Single crate | +| **Error Handling** | ❌ Manual | ⚠️ Complex | ✅ Built-in retry | + +### The Nushell Limitation + +Nushell's `input` command is limited: + +```nushell +# Current: No validation, no security +let password = input "Password: " # ❌ Shows in terminal +let region = input "AWS Region: " # ❌ No autocomplete/validation + +# Cannot do: +# - Multi-select from options +# - Conditional fields (if X then ask Y) +# - Password masking +# - Real-time validation +# - Autocomplete/fuzzy search +``` + +### The Nickel Constraint + +Nickel is declarative and cannot prompt users: + +```nickel +# Nickel defines what the config looks like, NOT how to get it +{ + database = { + host | String, + port | Number, + credentials | { username: String, password: String }, + } +} + +# Nickel cannot: +# - Prompt user for values +# - Show interactive forms +# - Validate input interactively +``` + +### Why Rust + TUI Dialog Is The Solution + +**Rust provides**: +- Native terminal control (crossterm, termion) +- Type-safe form definitions +- Validation rules as functions +- Secure memory handling (password zeroization) +- Performance (no subprocess overhead) + +**TUI Dialog provides**: +- Widget library (text, select, multi-select, confirm) +- Event loop and rendering +- Validation framework +- Error display and retry logic + +**Integration enables**: +- Nushell calls Rust CLI → Shows TUI dialog → Returns validated config +- Nickel receives validated config → Type checks → Merges with defaults + +## Consequences + +### Positive + +- **User Experience**: Professional TUI with validation and guidance +- **Security**: Password masking, sensitive data protection, no terminal history +- **Validation**: Type-safe rules enforced before config generation +- **Developer Experience**: Reusable form components across CLI commands +- **Error Handling**: Clear validation errors with retry options +- **Offline First**: No network dependencies for interactive input +- **Terminal Native**: Fits CLI workflow, no context switching +- **Maintainability**: Single library for all interactive input + +### Negative + +- **Terminal Dependency**: Requires interactive terminal (not scriptable) +- **Learning Curve**: Developers must learn TUI dialog patterns +- **Library Lock-in**: Tied to specific TUI library API +- **Testing Complexity**: Interactive tests require terminal mocking +- **Non-Interactive Fallback**: Need alternative for CI/CD and scripts + +### Mitigation Strategies + +**Non-Interactive Mode**: +```rust +// Support both interactive and non-interactive +if terminal::is_interactive() { + // Show TUI dialog + let config = show_workspace_form()?; +} else { + // Use config file or CLI args + let config = load_config_from_file(args.config)?; +} +``` + +**Testing**: +```rust +// Unit tests: Test form validation logic (no TUI) +#[test] +fn test_validate_workspace_name() { + assert!(validate_name("my-workspace").is_ok()); + assert!(validate_name("invalid name!").is_err()); +} + +// Integration tests: Use mock terminal or config files +``` + +**Scriptability**: +```bash +# Batch mode: Provide config via file +provisioning workspace init --config workspace.toml + +# Interactive mode: Show TUI dialog +provisioning workspace init --interactive +``` + +**Documentation**: +- Form schemas documented in `docs/` +- Config file examples provided +- Screenshots of TUI forms in guides + +## Alternatives Considered + +### Alternative 1: Shell-Based Prompts (Current State) + +**Pros**: Simple, no dependencies +**Cons**: No validation, poor UX, security risks +**Decision**: REJECTED - Inadequate for production use + +### Alternative 2: Web-Based Forms + +**Pros**: Rich UI, well-known patterns +**Cons**: Requires server, network dependency, context switch +**Decision**: REJECTED - Too complex for CLI tool + +### Alternative 3: Custom TUI Per Use Case + +**Pros**: Tailored to each need +**Cons**: High maintenance, code duplication, inconsistent UX +**Decision**: REJECTED - Not sustainable + +### Alternative 4: External Form Tool (dialog, whiptail) + +**Pros**: Mature, cross-platform +**Cons**: Subprocess overhead, limited validation, shell escaping issues +**Decision**: REJECTED - Poor Rust integration + +### Alternative 5: Text-Based Config Files Only + +**Pros**: Fully scriptable, no interactive complexity +**Cons**: Steep learning curve, no guidance for new users +**Decision**: REJECTED - Poor user onboarding experience + +## Implementation Details + +### Form Definition Pattern + +```rust +use typdialog::Form; + +pub fn workspace_initialization_form() -> Result { + let form = Form::new("Workspace Initialization") + .add_text_input("name", "Workspace Name") + .required() + .validator(|s| validate_workspace_name(s)) + .add_select("provider", "Cloud Provider") + .options(&["aws", "azure", "oci", "local"]) + .required() + .add_text_input("region", "Region") + .default("us-west-2") + .validator(|s| validate_region(s)) + .add_password("admin_password", "Admin Password") + .required() + .min_length(12) + .add_confirm("enable_monitoring", "Enable Monitoring?") + .default(true); + + let responses = form.run()?; + + // Convert to strongly-typed config + let config = WorkspaceConfig { + name: responses.get_string("name")?, + provider: responses.get_string("provider")?.parse()?, + region: responses.get_string("region")?, + admin_password: responses.get_password("admin_password")?, + enable_monitoring: responses.get_bool("enable_monitoring")?, + }; + + Ok(config) +} +``` + +### Integration with Nickel + +```rust +// 1. Get validated input from TUI dialog +let config = workspace_initialization_form()?; + +// 2. Serialize to TOML/JSON +let config_toml = toml::to_string(&config)?; + +// 3. Write to workspace config +fs::write("workspace/config.toml", config_toml)?; + +// 4. Nickel merges with defaults +// nickel export workspace/main.ncl --format json +// (uses workspace/config.toml as input) +``` + +### CLI Command Structure + +```rust +// provisioning/core/cli/src/commands/workspace.rs + +#[derive(Parser)] +pub enum WorkspaceCommand { + Init { + #[arg(long)] + interactive: bool, + + #[arg(long)] + config: Option, + }, +} + +pub fn handle_workspace_init(args: InitArgs) -> Result<()> { + if args.interactive || terminal::is_interactive() { + // Show TUI dialog + let config = workspace_initialization_form()?; + config.save("workspace/config.toml")?; + } else if let Some(config_path) = args.config { + // Use provided config + let config = WorkspaceConfig::load(config_path)?; + config.save("workspace/config.toml")?; + } else { + bail!("Either --interactive or --config required"); + } + + // Continue with workspace setup + Ok(()) +} +``` + +### Validation Rules + +```rust +pub fn validate_workspace_name(name: &str) -> Result<(), String> { + // Alphanumeric, hyphens, 3-32 chars + let re = Regex::new(r"^[a-z0-9-]{3,32}$").unwrap(); + if !re.is_match(name) { + return Err("Name must be 3-32 lowercase alphanumeric chars with hyphens".into()); + } + Ok(()) +} + +pub fn validate_region(region: &str) -> Result<(), String> { + const VALID_REGIONS: &[&str] = &["us-west-1", "us-west-2", "us-east-1", "eu-west-1"]; + if !VALID_REGIONS.contains(®ion) { + return Err(format!("Invalid region. Must be one of: {}", VALID_REGIONS.join(", "))); + } + Ok(()) +} +``` + +### Security: Password Handling + +```rust +use zeroize::Zeroizing; + +pub fn get_secure_password() -> Result> { + let form = Form::new("Secure Input") + .add_password("password", "Password") + .required() + .min_length(12) + .validator(password_strength_check); + + let responses = form.run()?; + + // Password automatically zeroized when dropped + let password = Zeroizing::new(responses.get_password("password")?); + + Ok(password) +} +``` + +## Testing Strategy + +**Unit Tests**: +```rust +#[test] +fn test_workspace_name_validation() { + assert!(validate_workspace_name("my-workspace").is_ok()); + assert!(validate_workspace_name("UPPERCASE").is_err()); + assert!(validate_workspace_name("ab").is_err()); // Too short +} +``` + +**Integration Tests**: +```rust +// Use non-interactive mode with config files +#[test] +fn test_workspace_init_non_interactive() { + let config = WorkspaceConfig { + name: "test-workspace".into(), + provider: Provider::Local, + region: "us-west-2".into(), + admin_password: "secure-password-123".into(), + enable_monitoring: true, + }; + + config.save("/tmp/test-config.toml").unwrap(); + + let result = handle_workspace_init(InitArgs { + interactive: false, + config: Some("/tmp/test-config.toml".into()), + }); + + assert!(result.is_ok()); +} +``` + +**Manual Testing**: +```bash +# Test interactive flow +cargo build --release +./target/release/provisioning workspace init --interactive + +# Test validation errors +# - Try invalid workspace name +# - Try weak password +# - Try invalid region +``` + +## Configuration Integration + +**CLI Flag**: +```toml +# provisioning/config/config.defaults.toml +[ui] +interactive_mode = "auto" # "auto" | "always" | "never" +dialog_theme = "default" # "default" | "minimal" | "colorful" +``` + +**Environment Override**: +```bash +# Force non-interactive mode (for CI/CD) +export PROVISIONING_INTERACTIVE=false + +# Force interactive mode +export PROVISIONING_INTERACTIVE=true +``` + +## Documentation Requirements + +**User Guides**: +- `docs/user/interactive-configuration.md` - How to use TUI dialogs +- `docs/guides/workspace-setup.md` - Workspace initialization with screenshots + +**Developer Documentation**: +- `docs/development/tui-forms.md` - Creating new TUI forms +- Form definition best practices +- Validation rule patterns + +**Configuration Schema**: +```nickel +# provisioning/schemas/workspace.ncl +{ + WorkspaceConfig = { + name + | doc "Workspace identifier (3-32 alphanumeric chars with hyphens)" + | String, + provider + | doc "Cloud provider" + | [| 'aws, 'azure, 'oci, 'local |], + region + | doc "Deployment region" + | String, + admin_password + | doc "Admin password (min 12 characters)" + | String, + enable_monitoring + | doc "Enable monitoring services" + | Bool, + } +} +``` + +## Migration Path + +**Phase 1: Add Library** +- Add typdialog dependency to `provisioning/core/cli/Cargo.toml` +- Create TUI dialog wrapper module +- Implement basic text/select widgets + +**Phase 2: Implement Forms** +- Workspace initialization form +- Credential setup form +- Configuration wizard forms + +**Phase 3: CLI Integration** +- Update CLI commands to use TUI dialogs +- Add `--interactive` / `--config` flags +- Implement non-interactive fallback + +**Phase 4: Documentation** +- User guides with screenshots +- Developer documentation for form creation +- Example configs for non-interactive use + +**Phase 5: Testing** +- Unit tests for validation logic +- Integration tests with config files +- Manual testing on all platforms + +## References + +- [typdialog Crate](https://crates.io/crates/typdialog) (or similar: dialoguer, inquire) +- [crossterm](https://crates.io/crates/crossterm) - Terminal manipulation +- [zeroize](https://crates.io/crates/zeroize) - Secure memory zeroization +- ADR-004: Hybrid Architecture (Rust/Nushell integration) +- ADR-011: Nickel Migration (declarative config language) +- ADR-012: Nushell Plugins (CLI wrapper patterns) +- Nushell `input` command limitations: [Nushell Book - Input](https://www.nushell.sh/commands/docs/input.html) + +--- + +**Status**: Accepted +**Last Updated**: 2025-01-08 +**Implementation**: Planned +**Priority**: High (User onboarding and security) +**Estimated Complexity**: Moderate diff --git a/docs/src/architecture/adr/adr-014-secretumvault-integration.md b/docs/src/architecture/adr/adr-014-secretumvault-integration.md new file mode 100644 index 0000000..bfe08b4 --- /dev/null +++ b/docs/src/architecture/adr/adr-014-secretumvault-integration.md @@ -0,0 +1,657 @@ +# ADR-014: SecretumVault Integration for Secrets Management + +## Status + +**Accepted** - 2025-01-08 + +## Context + +The provisioning system manages sensitive data across multiple infrastructure layers: cloud provider credentials, database passwords, API keys, SSH keys, encryption keys, and service tokens. The current security architecture (ADR-009) includes SOPS for encrypted config files and Age for key management, but lacks a centralized secrets management solution with dynamic secrets, access control, and audit logging. + +### Current Secrets Management Challenges + +**Existing Approach**: + +1. **SOPS + Age**: Static secrets encrypted in config files + - Good: Version-controlled, gitops-friendly + - Limited: Static rotation, no audit trail, manual key distribution + +2. **Nickel Configuration**: Declarative secrets references + - Good: Type-safe configuration + - Limited: Cannot generate dynamic secrets, no lifecycle management + +3. **Manual Secret Injection**: Environment variables, CLI flags + - Good: Simple for development + - Limited: No security guarantees, prone to leakage + +### Problems Without Centralized Secrets Management + +**Security Issues**: +- ❌ No centralized audit trail (who accessed which secret when) +- ❌ No automatic secret rotation policies +- ❌ No fine-grained access control (Cedar policies not enforced on secrets) +- ❌ Secrets scattered across: SOPS files, env vars, config files, K8s secrets +- ❌ No detection of secret sprawl or leaked credentials + +**Operational Issues**: +- ❌ Manual secret rotation (error-prone, often neglected) +- ❌ No secret versioning (cannot rollback to previous credentials) +- ❌ Difficult onboarding (manual key distribution) +- ❌ No dynamic secrets (credentials exist indefinitely) + +**Compliance Issues**: +- ❌ Cannot prove compliance with secret access policies +- ❌ No audit logs for regulatory requirements +- ❌ Cannot enforce secret expiration policies +- ❌ Difficult to demonstrate least-privilege access + +### Use Cases Requiring Centralized Secrets Management + +1. **Dynamic Database Credentials**: + - Generate short-lived DB credentials for applications + - Automatic rotation based on policies + - Revocation on application termination + +2. **Cloud Provider API Keys**: + - Centralized storage with access control + - Audit trail of credential usage + - Automatic rotation schedules + +3. **Service-to-Service Authentication**: + - Dynamic tokens for microservices + - Short-lived certificates for mTLS + - Automatic renewal before expiration + +4. **SSH Key Management**: + - Temporal SSH keys (ADR-009 SSH integration) + - Centralized certificate authority + - Audit trail of SSH access + +5. **Encryption Key Management**: + - Master encryption keys for data at rest + - Key rotation and versioning + - Integration with KMS systems + +### Requirements for Secrets Management System + +- ✅ **Dynamic Secrets**: Generate credentials on-demand with TTL +- ✅ **Access Control**: Integration with Cedar authorization policies +- ✅ **Audit Logging**: Complete trail of secret access and modifications +- ✅ **Secret Rotation**: Automatic and manual rotation policies +- ✅ **Versioning**: Track secret versions, enable rollback +- ✅ **High Availability**: Distributed, fault-tolerant architecture +- ✅ **Encryption at Rest**: AES-256-GCM for stored secrets +- ✅ **API-First**: RESTful API for integration +- ✅ **Plugin Ecosystem**: Extensible backends (AWS, Azure, databases) +- ✅ **Open Source**: Self-hosted, no vendor lock-in + +## Decision + +Integrate **SecretumVault** as the centralized secrets management system for the provisioning platform. + +### Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Provisioning CLI / Orchestrator / Services │ +│ │ +│ - Workspace initialization (credentials) │ +│ - Infrastructure deployment (cloud API keys) │ +│ - Service configuration (database passwords) │ +│ - SSH temporal keys (certificate generation) │ +└────────────┬────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ SecretumVault Client Library (Rust) │ +│ (provisioning/core/libs/secretum-client/) │ +│ │ +│ - Authentication (token, mTLS) │ +│ - Secret CRUD operations │ +│ - Dynamic secret generation │ +│ - Lease renewal and revocation │ +│ - Policy enforcement │ +└────────────┬────────────────────────────────────────────────┘ + │ HTTPS + mTLS + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ SecretumVault Server │ +│ (Rust-based Vault implementation) │ +│ │ +│ ┌───────────────────────────────────────────────────┐ │ +│ │ API Layer (REST + gRPC) │ │ +│ ├───────────────────────────────────────────────────┤ │ +│ │ Authentication & Authorization │ │ +│ │ - Token auth, mTLS, OIDC integration │ │ +│ │ - Cedar policy enforcement │ │ +│ ├───────────────────────────────────────────────────┤ │ +│ │ Secret Engines │ │ +│ │ - KV (key-value v2 with versioning) │ │ +│ │ - Database (dynamic credentials) │ │ +│ │ - SSH (certificate authority) │ │ +│ │ - PKI (X.509 certificates) │ │ +│ │ - Cloud Providers (AWS/Azure/OCI) │ │ +│ ├───────────────────────────────────────────────────┤ │ +│ │ Storage Backend │ │ +│ │ - Encrypted storage (AES-256-GCM) │ │ +│ │ - PostgreSQL / Raft cluster │ │ +│ ├───────────────────────────────────────────────────┤ │ +│ │ Audit Backend │ │ +│ │ - Structured logging (JSON) │ │ +│ │ - Syslog, file, database sinks │ │ +│ └───────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Backends (Dynamic Secret Generation) │ +│ │ +│ - PostgreSQL/MySQL (database credentials) │ +│ - AWS IAM (temporary access keys) │ +│ - Azure AD (service principals) │ +│ - SSH CA (signed certificates) │ +│ - PKI (X.509 certificates) │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Implementation Characteristics + +**SecretumVault Provides**: + +- ✅ Dynamic secret generation with configurable TTL +- ✅ Secret versioning and rollback capabilities +- ✅ Fine-grained access control (Cedar policies) +- ✅ Complete audit trail (all operations logged) +- ✅ Automatic secret rotation policies +- ✅ High availability (Raft consensus) +- ✅ Encryption at rest (AES-256-GCM) +- ✅ Plugin architecture for secret backends +- ✅ RESTful and gRPC APIs +- ✅ Rust implementation (performance, safety) + +**Integration with Provisioning System**: + +- ✅ Rust client library (native integration) +- ✅ Nushell commands via CLI wrapper +- ✅ Nickel configuration references secrets +- ✅ Cedar policies control secret access +- ✅ Orchestrator manages secret lifecycle +- ✅ SSH integration for temporal keys +- ✅ KMS integration for encryption keys + +## Rationale + +### Why SecretumVault Is Required + +| Aspect | SOPS + Age (current) | HashiCorp Vault | SecretumVault (chosen) | +|--------|----------------------|-----------------|------------------------| +| **Dynamic Secrets** | ❌ Static only | ✅ Full support | ✅ Full support | +| **Rust Native** | ⚠️ External CLI | ❌ Go binary | ✅ Pure Rust | +| **Cedar Integration** | ❌ None | ❌ Custom policies | ✅ Native Cedar | +| **Audit Trail** | ❌ Git only | ✅ Comprehensive | ✅ Comprehensive | +| **Secret Rotation** | ❌ Manual | ✅ Automatic | ✅ Automatic | +| **Open Source** | ✅ Yes | ⚠️ MPL 2.0 (BSL now) | ✅ Yes | +| **Self-Hosted** | ✅ Yes | ✅ Yes | ✅ Yes | +| **License** | ✅ Permissive | ⚠️ BSL (proprietary) | ✅ Permissive | +| **Versioning** | ⚠️ Git commits | ✅ Built-in | ✅ Built-in | +| **High Availability** | ❌ Single file | ✅ Raft cluster | ✅ Raft cluster | +| **Performance** | ✅ Fast (local) | ⚠️ Network latency | ✅ Rust performance | + +### Why Not Continue with SOPS Alone? + +SOPS is excellent for **static secrets in git**, but inadequate for: + +1. **Dynamic Credentials**: Cannot generate temporary DB passwords +2. **Audit Trail**: Git commits are insufficient for compliance +3. **Rotation Policies**: Manual rotation is error-prone +4. **Access Control**: No runtime policy enforcement +5. **Secret Lifecycle**: Cannot track usage or revoke access +6. **Multi-System Integration**: Limited to files, not API-accessible + +**Complementary Approach**: +- SOPS: Configuration files with long-lived secrets (gitops workflow) +- SecretumVault: Runtime dynamic secrets, short-lived credentials, audit trail + +### Why SecretumVault Over HashiCorp Vault? + +**HashiCorp Vault Limitations**: + +1. **License Change**: BSL (Business Source License) - proprietary for production +2. **Not Rust Native**: Go binary, subprocess overhead +3. **Custom Policy Language**: HCL policies, not Cedar (provisioning standard) +4. **Complex Deployment**: Heavy operational burden +5. **Vendor Lock-In**: HashiCorp ecosystem dependency + +**SecretumVault Advantages**: + +1. **Rust Native**: Zero-cost integration, no subprocess spawning +2. **Cedar Policies**: Consistent with ADR-008 authorization model +3. **Lightweight**: Smaller binary, lower resource usage +4. **Open Source**: Permissive license, community-driven +5. **Provisioning-First**: Designed for IaC workflows + +### Integration with Existing Security Architecture + +**ADR-009 (Security System)**: +- SOPS: Static config encryption (unchanged) +- Age: Key management for SOPS (unchanged) +- SecretumVault: Dynamic secrets, runtime access control (new) + +**ADR-008 (Cedar Authorization)**: +- Cedar policies control SecretumVault secret access +- Fine-grained permissions: `read:secret:database/prod/password` +- Audit trail records Cedar policy decisions + +**SSH Temporal Keys**: +- SecretumVault SSH CA signs user certificates +- Short-lived certificates (1-24 hours) +- Audit trail of SSH access + +## Consequences + +### Positive + +- **Security Posture**: Centralized secrets with audit trail and rotation +- **Compliance**: Complete audit logs for regulatory requirements +- **Operational Excellence**: Automatic rotation, dynamic credentials +- **Developer Experience**: Simple API for secret access +- **Performance**: Rust implementation, zero-cost abstractions +- **Consistency**: Cedar policies across entire system (auth + secrets) +- **Observability**: Metrics, logs, traces for secret access +- **Disaster Recovery**: Secret versioning enables rollback + +### Negative + +- **Infrastructure Complexity**: Additional service to deploy and operate +- **High Availability Requirements**: Raft cluster needs 3+ nodes +- **Migration Effort**: Existing SOPS secrets need migration path +- **Learning Curve**: Operators must learn vault concepts +- **Dependency Risk**: Critical path service (secrets unavailable = system down) + +### Mitigation Strategies + +**High Availability**: +```bash +# Deploy SecretumVault cluster (3 nodes) +provisioning deploy secretum-vault --ha --replicas 3 + +# Automatic leader election via Raft +# Clients auto-reconnect to leader +``` + +**Migration from SOPS**: +```bash +# Phase 1: Import existing SOPS secrets into SecretumVault +provisioning secrets migrate --from-sops config/secrets.yaml + +# Phase 2: Update Nickel configs to reference vault paths +# Phase 3: Deprecate SOPS for runtime secrets (keep for config files) +``` + +**Fallback Strategy**: +```rust +// Graceful degradation if vault unavailable +let secret = match vault_client.get_secret("database/password").await { + Ok(s) => s, + Err(VaultError::Unavailable) => { + // Fallback to SOPS for read-only operations + warn!("Vault unavailable, using SOPS fallback"); + sops_decrypt("config/secrets.yaml", "database.password")? + }, + Err(e) => return Err(e), +}; +``` + +**Operational Monitoring**: +```toml +# prometheus metrics +secretum_vault_request_duration_seconds +secretum_vault_secret_lease_expiry +secretum_vault_auth_failures_total +secretum_vault_raft_leader_changes + +# Alerts: Vault unavailable, high auth failure rate, lease expiry +``` + +## Alternatives Considered + +### Alternative 1: Continue with SOPS Only + +**Pros**: No new infrastructure, simple +**Cons**: No dynamic secrets, no audit trail, manual rotation +**Decision**: REJECTED - Insufficient for production security + +### Alternative 2: HashiCorp Vault + +**Pros**: Mature, feature-rich, widely adopted +**Cons**: BSL license, Go binary, HCL policies (not Cedar), complex deployment +**Decision**: REJECTED - License and integration concerns + +### Alternative 3: Cloud Provider Native (AWS Secrets Manager, Azure Key Vault) + +**Pros**: Fully managed, high availability +**Cons**: Vendor lock-in, multi-cloud complexity, cost at scale +**Decision**: REJECTED - Against open-source and multi-cloud principles + +### Alternative 4: CyberArk, 1Password, etc. + +**Pros**: Enterprise features +**Cons**: Proprietary, expensive, poor API integration +**Decision**: REJECTED - Not suitable for IaC automation + +### Alternative 5: Build Custom Secrets Manager + +**Pros**: Full control, tailored to needs +**Cons**: High maintenance burden, security risk, reinventing wheel +**Decision**: REJECTED - SecretumVault provides this already + +## Implementation Details + +### SecretumVault Deployment + +```bash +# Deploy via provisioning system +provisioning deploy secretum-vault \ + --ha \ + --replicas 3 \ + --storage postgres \ + --tls-cert /path/to/cert.pem \ + --tls-key /path/to/key.pem + +# Initialize and unseal +provisioning vault init +provisioning vault unseal --key-shares 5 --key-threshold 3 +``` + +### Rust Client Library + +```rust +// provisioning/core/libs/secretum-client/src/lib.rs + +use secretum_vault::{Client, SecretEngine, Auth}; + +pub struct VaultClient { + client: Client, +} + +impl VaultClient { + pub async fn new(addr: &str, token: &str) -> Result { + let client = Client::new(addr) + .auth(Auth::Token(token)) + .tls_config(TlsConfig::from_files("ca.pem", "cert.pem", "key.pem"))? + .build()?; + + Ok(Self { client }) + } + + pub async fn get_secret(&self, path: &str) -> Result { + self.client.kv2().get(path).await + } + + pub async fn create_dynamic_db_credentials(&self, role: &str) -> Result { + self.client.database().generate_credentials(role).await + } + + pub async fn sign_ssh_key(&self, public_key: &str, ttl: Duration) -> Result { + self.client.ssh().sign_key(public_key, ttl).await + } +} +``` + +### Nushell Integration + +```nushell +# Nushell commands via Rust CLI wrapper +provisioning secrets get database/prod/password +provisioning secrets set api/keys/stripe --value "sk_live_xyz" +provisioning secrets rotate database/prod/password +provisioning secrets lease renew lease_id_12345 +provisioning secrets list database/ +``` + +### Nickel Configuration Integration + +```nickel +# provisioning/schemas/database.ncl +{ + database = { + host = "postgres.example.com", + port = 5432, + username = secrets.get "database/prod/username", + password = secrets.get "database/prod/password", + } +} + +# Nickel function: secrets.get resolves to SecretumVault API call +``` + +### Cedar Policy for Secret Access + +```cedar +// policy: developers can read dev secrets, not prod +permit( + principal in Group::"developers", + action == Action::"read", + resource in Secret::"database/dev" +); + +forbid( + principal in Group::"developers", + action == Action::"read", + resource in Secret::"database/prod" +); + +// policy: CI/CD can generate dynamic DB credentials +permit( + principal == Service::"github-actions", + action == Action::"generate", + resource in Secret::"database/dynamic" +) when { + context.ttl <= duration("1h") +}; +``` + +### Dynamic Database Credentials + +```rust +// Application requests temporary DB credentials +let creds = vault_client + .database() + .generate_credentials("postgres-readonly") + .await?; + +println!("Username: {}", creds.username); // v-app-abcd1234 +println!("Password: {}", creds.password); // random-secure-password +println!("TTL: {}", creds.lease_duration); // 1h + +// Credentials automatically revoked after TTL +// No manual cleanup needed +``` + +### Secret Rotation Automation + +```toml +# secretum-vault config +[[rotation_policies]] +path = "database/prod/password" +schedule = "0 0 * * 0" # Weekly on Sunday midnight +max_age = "30d" + +[[rotation_policies]] +path = "api/keys/stripe" +schedule = "0 0 1 * *" # Monthly on 1st +max_age = "90d" +``` + +### Audit Log Format + +```json +{ + "timestamp": "2025-01-08T12:34:56Z", + "type": "request", + "auth": { + "client_token": "sha256:abc123...", + "accessor": "hmac:def456...", + "display_name": "service-orchestrator", + "policies": ["default", "service-policy"] + }, + "request": { + "operation": "read", + "path": "secret/data/database/prod/password", + "remote_address": "10.0.1.5" + }, + "response": { + "status": 200 + }, + "cedar_policy": { + "decision": "permit", + "policy_id": "allow-orchestrator-read-secrets" + } +} +``` + +## Testing Strategy + +**Unit Tests**: +```rust +#[tokio::test] +async fn test_get_secret() { + let vault = mock_vault_client(); + let secret = vault.get_secret("test/secret").await.unwrap(); + assert_eq!(secret.value, "expected-value"); +} + +#[tokio::test] +async fn test_dynamic_credentials_generation() { + let vault = mock_vault_client(); + let creds = vault.create_dynamic_db_credentials("postgres-readonly").await.unwrap(); + assert!(creds.username.starts_with("v-")); + assert_eq!(creds.lease_duration, Duration::from_secs(3600)); +} +``` + +**Integration Tests**: +```bash +# Test vault deployment +provisioning deploy secretum-vault --test-mode +provisioning vault init +provisioning vault unseal + +# Test secret operations +provisioning secrets set test/secret --value "test-value" +provisioning secrets get test/secret | assert "test-value" + +# Test dynamic credentials +provisioning secrets db-creds postgres-readonly | jq '.username' | assert-contains "v-" + +# Test rotation +provisioning secrets rotate test/secret +``` + +**Security Tests**: +```rust +#[tokio::test] +async fn test_unauthorized_access_denied() { + let vault = vault_client_with_limited_token(); + let result = vault.get_secret("database/prod/password").await; + assert!(matches!(result, Err(VaultError::PermissionDenied))); +} +``` + +## Configuration Integration + +**Provisioning Config**: +```toml +# provisioning/config/config.defaults.toml +[secrets] +provider = "secretum-vault" # "secretum-vault" | "sops" | "env" +vault_addr = "https://vault.example.com:8200" +vault_namespace = "provisioning" +vault_mount = "secret" + +[secrets.tls] +ca_cert = "/etc/provisioning/vault-ca.pem" +client_cert = "/etc/provisioning/vault-client.pem" +client_key = "/etc/provisioning/vault-client-key.pem" + +[secrets.cache] +enabled = true +ttl = "5m" +max_size = "100MB" +``` + +**Environment Variables**: +```bash +export VAULT_ADDR="https://vault.example.com:8200" +export VAULT_TOKEN="s.abc123def456..." +export VAULT_NAMESPACE="provisioning" +export VAULT_CACERT="/etc/provisioning/vault-ca.pem" +``` + +## Migration Path + +**Phase 1: Deploy SecretumVault** +- Deploy vault cluster in HA mode +- Initialize and configure backends +- Set up Cedar policies + +**Phase 2: Migrate Static Secrets** +- Import SOPS secrets into vault KV store +- Update Nickel configs to reference vault paths +- Verify secret access via new API + +**Phase 3: Enable Dynamic Secrets** +- Configure database secret engine +- Configure SSH CA secret engine +- Update applications to use dynamic credentials + +**Phase 4: Deprecate SOPS for Runtime** +- SOPS remains for gitops config files +- Runtime secrets exclusively from vault +- Audit trail enforcement + +**Phase 5: Automation** +- Automatic rotation policies +- Lease renewal automation +- Monitoring and alerting + +## Documentation Requirements + +**User Guides**: +- `docs/user/secrets-management.md` - Using SecretumVault +- `docs/user/dynamic-credentials.md` - Dynamic secret workflows +- `docs/user/secret-rotation.md` - Rotation policies and procedures + +**Operations Documentation**: +- `docs/operations/vault-deployment.md` - Deploying and configuring vault +- `docs/operations/vault-backup-restore.md` - Backup and disaster recovery +- `docs/operations/vault-monitoring.md` - Metrics, logs, alerts + +**Developer Documentation**: +- `docs/development/secrets-api.md` - Rust client library usage +- `docs/development/cedar-secret-policies.md` - Writing Cedar policies for secrets +- Secret engine development guide + +**Security Documentation**: +- `docs/security/secrets-architecture.md` - Security architecture overview +- `docs/security/audit-logging.md` - Audit trail and compliance +- Threat model and risk assessment + +## References + +- [SecretumVault GitHub](https://github.com/secretum-vault/secretum) (hypothetical, replace with actual) +- [HashiCorp Vault Documentation](https://www.vaultproject.io/docs) (for comparison) +- ADR-008: Cedar Authorization (policy integration) +- ADR-009: Security System Complete (current security architecture) +- [Raft Consensus Algorithm](https://raft.github.io/) +- [Cedar Policy Language](https://www.cedarpolicy.com/) +- SOPS: [https://github.com/getsops/sops](https://github.com/getsops/sops) +- Age Encryption: [https://age-encryption.org/](https://age-encryption.org/) + +--- + +**Status**: Accepted +**Last Updated**: 2025-01-08 +**Implementation**: Planned +**Priority**: High (Security and compliance) +**Estimated Complexity**: Complex diff --git a/docs/src/architecture/adr/adr-015-ai-integration-architecture.md b/docs/src/architecture/adr/adr-015-ai-integration-architecture.md new file mode 100644 index 0000000..7fc3d7c --- /dev/null +++ b/docs/src/architecture/adr/adr-015-ai-integration-architecture.md @@ -0,0 +1,1115 @@ +# ADR-015: AI Integration Architecture for Intelligent Infrastructure Provisioning + +## Status + +**Accepted** - 2025-01-08 + +## Context + +The provisioning platform has evolved to include complex workflows for infrastructure configuration, deployment, and management. +Current interaction patterns require deep technical knowledge of Nickel schemas, cloud provider APIs, networking concepts, and security best practices. +This creates barriers to entry and slows down infrastructure provisioning for operators who are not infrastructure experts. + +### The Infrastructure Complexity Problem + +**Current state challenges**: + +1. **Knowledge Barrier**: Deep Nickel, cloud, and networking expertise required + - Understanding Nickel type system and contracts + - Knowing cloud provider resource relationships + - Configuring security policies correctly + - Debugging deployment failures + +2. **Manual Configuration**: All configs hand-written + - Repetitive boilerplate for common patterns + - Easy to make mistakes (typos, missing fields) + - No intelligent suggestions or autocomplete + - Trial-and-error debugging + +3. **Limited Assistance**: No contextual help + - Documentation is separate from workflow + - No explanation of validation errors + - No suggestions for fixing issues + - No learning from past deployments + +4. **Troubleshooting Difficulty**: Manual log analysis + - Deployment failures require expert analysis + - No automated root cause detection + - No suggested fixes based on similar issues + - Long time-to-resolution + +### AI Integration Opportunities + +1. **Natural Language to Configuration**: + - User: "Create a production PostgreSQL cluster with encryption and daily backups" + - AI: Generates validated Nickel configuration + +2. **AI-Assisted Form Filling**: + - User starts typing in typdialog web form + - AI suggests values based on context + - AI explains validation errors in plain language + +3. **Intelligent Troubleshooting**: + - Deployment fails + - AI analyzes logs and suggests fixes + - AI generates corrected configuration + +4. **Configuration Optimization**: + - AI analyzes workload patterns + - AI suggests performance improvements + - AI detects security misconfigurations + +5. **Learning from Operations**: + - AI indexes past deployments + - AI suggests configurations based on similar workloads + - AI predicts potential issues + +### AI Components Overview + +The system integrates multiple AI components: + +1. **typdialog-ai**: AI-assisted form interactions +2. **typdialog-ag**: AI agents for autonomous operations +3. **typdialog-prov-gen**: AI-powered configuration generation +4. **platform/crates/ai-service**: Core AI service backend +5. **platform/crates/mcp-server**: Model Context Protocol server +6. **platform/crates/rag**: Retrieval-Augmented Generation system + +### Requirements for AI Integration + +- ✅ **Natural Language Understanding**: Parse user intent from free-form text +- ✅ **Schema-Aware Generation**: Generate valid Nickel configurations +- ✅ **Context Retrieval**: Access documentation, schemas, past deployments +- ✅ **Security Enforcement**: Cedar policies control AI access +- ✅ **Human-in-the-Loop**: All AI actions require human approval +- ✅ **Audit Trail**: Complete logging of AI operations +- ✅ **Multi-Provider Support**: OpenAI, Anthropic, local models +- ✅ **Cost Control**: Rate limiting and budget management +- ✅ **Observability**: Trace AI decisions and reasoning + +## Decision + +Integrate a **comprehensive AI system** consisting of: + +1. **AI-Assisted Interfaces** (typdialog-ai) +2. **Autonomous AI Agents** (typdialog-ag) +3. **AI Configuration Generator** (typdialog-prov-gen) +4. **Core AI Infrastructure** (ai-service, mcp-server, rag) + +All AI components are **schema-aware**, **security-enforced**, and **human-supervised**. + +### Architecture Diagram + +```text +┌─────────────────────────────────────────────────────────────────┐ +│ User Interfaces │ +│ │ +│ Natural Language: "Create production K8s cluster in AWS" │ +│ Typdialog Forms: AI-assisted field suggestions │ +│ CLI: provisioning ai generate-config "description" │ +└────────────┬────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ AI Frontend Layer │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ typdialog-ai (AI-Assisted Forms) │ │ +│ │ - Natural language form filling │ │ +│ │ - Real-time AI suggestions │ │ +│ │ - Validation error explanations │ │ +│ │ - Context-aware autocomplete │ │ +│ ├───────────────────────────────────────────────────────┤ │ +│ │ typdialog-ag (AI Agents) │ │ +│ │ - Autonomous task execution │ │ +│ │ - Multi-step workflow automation │ │ +│ │ - Learning from feedback │ │ +│ │ - Agent collaboration │ │ +│ ├───────────────────────────────────────────────────────┤ │ +│ │ typdialog-prov-gen (Config Generator) │ │ +│ │ - Natural language → Nickel config │ │ +│ │ - Template-based generation │ │ +│ │ - Best practice injection │ │ +│ │ - Validation and refinement │ │ +│ └───────────────────────────────────────────────────────┘ │ +└────────────┬────────────────────────────────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────────────────────────┐ +│ Core AI Infrastructure (platform/crates/) │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ ai-service (Central AI Service) │ │ +│ │ │ │ +│ │ - Request routing and orchestration │ │ +│ │ - Authentication and authorization (Cedar) │ │ +│ │ - Rate limiting and cost control │ │ +│ │ - Caching and optimization │ │ +│ │ - Audit logging and observability │ │ +│ │ - Multi-provider abstraction │ │ +│ └─────────────┬─────────────────────┬───────────────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─────────────────────┐ ┌─────────────────────┐ │ +│ │ mcp-server │ │ rag │ │ +│ │ (Model Context │ │ (Retrieval-Aug Gen) │ │ +│ │ Protocol) │ │ │ │ +│ │ │ │ ┌─────────────────┐ │ │ +│ │ - LLM integration │ │ │ Vector Store │ │ │ +│ │ - Tool calling │ │ │ (Qdrant/Milvus) │ │ │ +│ │ - Context mgmt │ │ └─────────────────┘ │ │ +│ │ - Multi-provider │ │ ┌─────────────────┐ │ │ +│ │ (OpenAI, │ │ │ Embeddings │ │ │ +│ │ Anthropic, │ │ │ (text-embed) │ │ │ +│ │ Local models) │ │ └─────────────────┘ │ │ +│ │ │ │ ┌─────────────────┐ │ │ +│ │ Tools: │ │ │ Index: │ │ │ +│ │ - nickel_validate │ │ │ - Nickel schemas│ │ │ +│ │ - schema_query │ │ │ - Documentation │ │ │ +│ │ - config_generate │ │ │ - Past deploys │ │ │ +│ │ - cedar_check │ │ │ - Best practices│ │ │ +│ └─────────────────────┘ │ └─────────────────┘ │ │ +│ │ │ │ +│ │ Query: "How to │ │ +│ │ configure Postgres │ │ +│ │ with encryption?" │ │ +│ │ │ │ +│ │ Retrieval: Relevant │ │ +│ │ docs + examples │ │ +│ └─────────────────────┘ │ +└────────────┬───────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Integration Points │ +│ │ +│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────────┐ │ +│ │ Nickel │ │ SecretumVault│ │ Cedar Authorization │ │ +│ │ Validation │ │ (Secrets) │ │ (AI Policies) │ │ +│ └─────────────┘ └──────────────┘ └─────────────────────┘ │ +│ │ +│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────────┐ │ +│ │ Orchestrator│ │ Typdialog │ │ Audit Logging │ │ +│ │ (Deploy) │ │ (Forms) │ │ (All AI Ops) │ │ +│ └─────────────┘ └──────────────┘ └─────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Output: Validated Nickel Configuration │ +│ │ +│ ✅ Schema-validated │ +│ ✅ Security-checked (Cedar policies) │ +│ ✅ Human-approved │ +│ ✅ Audit-logged │ +│ ✅ Ready for deployment │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Component Responsibilities + +**typdialog-ai** (AI-Assisted Forms): +- Real-time form field suggestions based on context +- Natural language form filling +- Validation error explanations in plain English +- Context-aware autocomplete for configuration values +- Integration with typdialog web UI + +**typdialog-ag** (AI Agents): +- Autonomous task execution (multi-step workflows) +- Agent collaboration (multiple agents working together) +- Learning from user feedback and past operations +- Goal-oriented behavior (achieve outcome, not just execute steps) +- Safety boundaries (cannot deploy without approval) + +**typdialog-prov-gen** (Config Generator): +- Natural language → Nickel configuration +- Template-based generation with customization +- Best practice injection (security, performance, HA) +- Iterative refinement based on validation feedback +- Integration with Nickel schema system + +**ai-service** (Core AI Service): +- Central request router for all AI operations +- Authentication and authorization (Cedar policies) +- Rate limiting and cost control +- Caching (reduce LLM API calls) +- Audit logging (all AI operations) +- Multi-provider abstraction (OpenAI, Anthropic, local) + +**mcp-server** (Model Context Protocol): +- LLM integration (OpenAI, Anthropic, local models) +- Tool calling framework (nickel_validate, schema_query, etc.) +- Context management (conversation history, schemas) +- Streaming responses for real-time feedback +- Error handling and retries + +**rag** (Retrieval-Augmented Generation): +- Vector store (Qdrant/Milvus) for embeddings +- Document indexing (Nickel schemas, docs, deployments) +- Semantic search (find relevant context) +- Embedding generation (text-embedding-3-large) +- Query expansion and reranking + +## Rationale + +### Why AI Integration Is Essential + +| Aspect | Manual Config | AI-Assisted (chosen) | +|--------|---------------|----------------------| +| **Learning Curve** | 🔴 Steep | 🟢 Gentle | +| **Time to Deploy** | 🔴 Hours | 🟢 Minutes | +| **Error Rate** | 🔴 High | 🟢 Low (validated) | +| **Documentation Access** | 🔴 Separate | 🟢 Contextual | +| **Troubleshooting** | 🔴 Manual | 🟢 AI-assisted | +| **Best Practices** | ⚠️ Manual enforcement | ✅ Auto-injected | +| **Consistency** | ⚠️ Varies by operator | ✅ Standardized | +| **Scalability** | 🔴 Limited by expertise | 🟢 AI scales knowledge | + +### Why Schema-Aware AI Is Critical + +Traditional AI code generation fails for infrastructure because: + +```text +Generic AI (like GitHub Copilot): +❌ Generates syntactically correct but semantically wrong configs +❌ Doesn't understand cloud provider constraints +❌ No validation against schemas +❌ No security policy enforcement +❌ Hallucinated resource names/IDs +``` + +**Schema-aware AI** (our approach): +```nickel +# Nickel schema provides ground truth +{ + Database = { + engine | [| 'postgres, 'mysql, 'mongodb |], + version | String, + storage_gb | Number, + backup_retention_days | Number, + } +} + +# AI generates ONLY valid configs +# AI knows: +# - Valid engine values ('postgres', not 'postgresql') +# - Required fields (all listed above) +# - Type constraints (storage_gb is Number, not String) +# - Nickel contracts (if defined) +``` + +**Result**: AI cannot generate invalid configs. + +### Why RAG (Retrieval-Augmented Generation) Is Essential + +LLMs alone have limitations: + +```text +Pure LLM: +❌ Knowledge cutoff (no recent updates) +❌ Hallucinations (invents plausible-sounding configs) +❌ No project-specific knowledge +❌ No access to past deployments +``` + +**RAG-enhanced LLM**: +```toml +Query: "How to configure Postgres with encryption?" + +RAG retrieves: +- Nickel schema: provisioning/schemas/database.ncl +- Documentation: docs/user/database-encryption.md +- Past deployment: workspaces/prod/postgres-encrypted.ncl +- Best practice: .claude/patterns/secure-database.md + +LLM generates answer WITH retrieved context: +✅ Accurate (based on actual schemas) +✅ Project-specific (uses our patterns) +✅ Proven (learned from past deployments) +✅ Secure (follows our security guidelines) +``` + +### Why Human-in-the-Loop Is Non-Negotiable + +AI-generated infrastructure configs require human approval: + +```rust +// All AI operations require approval +pub async fn ai_generate_config(request: GenerateRequest) -> Result { + let ai_generated = ai_service.generate(request).await?; + + // Validate against Nickel schema + let validation = nickel_validate(&ai_generated)?; + if !validation.is_valid() { + return Err("AI generated invalid config"); + } + + // Check Cedar policies + let authorized = cedar_authorize( + principal: user, + action: "approve_ai_config", + resource: ai_generated, + )?; + if !authorized { + return Err("User not authorized to approve AI config"); + } + + // Require explicit human approval + let approval = prompt_user_approval(&ai_generated).await?; + if !approval.approved { + audit_log("AI config rejected by user", &ai_generated); + return Err("User rejected AI-generated config"); + } + + audit_log("AI config approved by user", &ai_generated); + Ok(ai_generated) +} +``` + +**Why**: +- Infrastructure changes have real-world cost and security impact +- AI can make mistakes (hallucinations, misunderstandings) +- Compliance requires human accountability +- Learning opportunity (human reviews teach AI) + +### Why Multi-Provider Support Matters + +No single LLM provider is best for all tasks: + +| Provider | Best For | Considerations | +|----------|----------|----------------| +| **Anthropic (Claude)** | Long context, accuracy | ✅ Best for complex configs | +| **OpenAI (GPT-4)** | Tool calling, speed | ✅ Best for quick suggestions | +| **Local (Llama, Mistral)** | Privacy, cost | ✅ Best for air-gapped envs | + +**Strategy**: +- Complex config generation → Claude (long context) +- Real-time form suggestions → GPT-4 (fast) +- Air-gapped deployments → Local models (privacy) + +## Consequences + +### Positive + +- **Accessibility**: Non-experts can provision infrastructure +- **Productivity**: 10x faster configuration creation +- **Quality**: AI injects best practices automatically +- **Consistency**: Standardized configurations across teams +- **Learning**: Users learn from AI explanations +- **Troubleshooting**: AI-assisted debugging reduces MTTR +- **Documentation**: Contextual help embedded in workflow +- **Safety**: Schema validation prevents invalid configs +- **Security**: Cedar policies control AI access +- **Auditability**: Complete trail of AI operations + +### Negative + +- **Dependency**: Requires LLM API access (or local models) +- **Cost**: LLM API calls have per-token cost +- **Latency**: AI responses take 1-5 seconds +- **Accuracy**: AI can still make mistakes (needs validation) +- **Trust**: Users must understand AI limitations +- **Complexity**: Additional infrastructure to operate +- **Privacy**: Configs sent to LLM providers (unless local) + +### Mitigation Strategies + +**Cost Control**: +```toml +[ai.rate_limiting] +requests_per_minute = 60 +tokens_per_day = 1000000 +cost_limit_per_day = "100.00" # USD + +[ai.caching] +enabled = true +ttl = "1h" +# Cache similar queries to reduce API calls +``` + +**Latency Optimization**: +```rust +// Streaming responses for real-time feedback +pub async fn ai_generate_stream(request: GenerateRequest) -> impl Stream { + ai_service + .generate_stream(request) + .await + .map(|chunk| chunk.text) +} +``` + +**Privacy (Local Models)**: +```toml +[ai] +provider = "local" +model_path = "/opt/provisioning/models/llama-3-70b" + +# No data leaves the network +``` + +**Validation (Defense in Depth)**: +```text +AI generates config + ↓ +Nickel schema validation (syntax, types, contracts) + ↓ +Cedar policy check (security, compliance) + ↓ +Human approval (final gate) + ↓ +Deployment +``` + +**Observability**: +```toml +[ai.observability] +trace_all_requests = true +store_conversations = true +conversation_retention = "30d" + +# Every AI operation logged: +# - Input prompt +# - Retrieved context (RAG) +# - Generated output +# - Validation results +# - Human approval decision +``` + +## Alternatives Considered + +### Alternative 1: No AI Integration + +**Pros**: Simpler, no LLM dependencies +**Cons**: Steep learning curve, slow provisioning, manual troubleshooting +**Decision**: REJECTED - Poor user experience (10x slower provisioning, high error rate) + +### Alternative 2: Generic AI Code Generation (GitHub Copilot approach) + +**Pros**: Existing tools, well-known UX +**Cons**: Not schema-aware, generates invalid configs, no validation +**Decision**: REJECTED - Inadequate for infrastructure (correctness critical) + +### Alternative 3: AI Only for Documentation/Search + +**Pros**: Lower risk (AI doesn't generate configs) +**Cons**: Missed opportunity for 10x productivity gains +**Decision**: REJECTED - Too conservative + +### Alternative 4: Fully Autonomous AI (No Human Approval) + +**Pros**: Maximum automation +**Cons**: Unacceptable risk for infrastructure changes +**Decision**: REJECTED - Safety and compliance requirements + +### Alternative 5: Single LLM Provider Lock-in + +**Pros**: Simpler integration +**Cons**: Vendor lock-in, no flexibility for different use cases +**Decision**: REJECTED - Multi-provider abstraction provides flexibility + +## Implementation Details + +### AI Service API + +```rust +// platform/crates/ai-service/src/lib.rs + +#[async_trait] +pub trait AIService { + async fn generate_config( + &self, + prompt: &str, + schema: &NickelSchema, + context: Option, + ) -> Result; + + async fn suggest_field_value( + &self, + field: &FieldDefinition, + partial_input: &str, + form_context: &FormContext, + ) -> Result>; + + async fn explain_validation_error( + &self, + error: &ValidationError, + config: &Config, + ) -> Result; + + async fn troubleshoot_deployment( + &self, + deployment_id: &str, + logs: &DeploymentLogs, + ) -> Result; +} + +pub struct AIServiceImpl { + mcp_client: MCPClient, + rag: RAGService, + cedar: CedarEngine, + audit: AuditLogger, + rate_limiter: RateLimiter, + cache: Cache, +} + +impl AIService for AIServiceImpl { + async fn generate_config( + &self, + prompt: &str, + schema: &NickelSchema, + context: Option, + ) -> Result { + // Check authorization + self.cedar.authorize( + principal: current_user(), + action: "ai:generate_config", + resource: schema, + )?; + + // Rate limiting + self.rate_limiter.check(current_user()).await?; + + // Retrieve relevant context via RAG + let rag_context = match context { + Some(ctx) => ctx, + None => self.rag.retrieve(prompt, schema).await?, + }; + + // Generate config via MCP + let generated = self.mcp_client.generate( + prompt: prompt, + schema: schema, + context: rag_context, + tools: &["nickel_validate", "schema_query"], + ).await?; + + // Validate generated config + let validation = nickel_validate(&generated.config)?; + if !validation.is_valid() { + return Err(AIError::InvalidGeneration(validation.errors)); + } + + // Audit log + self.audit.log(AIOperation::GenerateConfig { + user: current_user(), + prompt: prompt, + schema: schema.name(), + generated: &generated.config, + validation: validation, + }); + + Ok(GeneratedConfig { + config: generated.config, + explanation: generated.explanation, + confidence: generated.confidence, + validation: validation, + }) + } +} +``` + +### MCP Server Integration + +```rust +// platform/crates/mcp-server/src/lib.rs + +pub struct MCPClient { + provider: Box, + tools: ToolRegistry, +} + +#[async_trait] +pub trait LLMProvider { + async fn generate(&self, request: GenerateRequest) -> Result; + async fn generate_stream(&self, request: GenerateRequest) -> Result>; +} + +// Tool definitions for LLM +pub struct ToolRegistry { + tools: HashMap, +} + +impl ToolRegistry { + pub fn new() -> Self { + let mut tools = HashMap::new(); + + tools.insert("nickel_validate", Tool { + name: "nickel_validate", + description: "Validate Nickel configuration against schema", + parameters: json!({ + "type": "object", + "properties": { + "config": {"type": "string"}, + "schema_path": {"type": "string"}, + }, + "required": ["config", "schema_path"], + }), + handler: Box::new(|params| async { + let config = params["config"].as_str().unwrap(); + let schema = params["schema_path"].as_str().unwrap(); + nickel_validate_tool(config, schema).await + }), + }); + + tools.insert("schema_query", Tool { + name: "schema_query", + description: "Query Nickel schema for field information", + parameters: json!({ + "type": "object", + "properties": { + "schema_path": {"type": "string"}, + "query": {"type": "string"}, + }, + "required": ["schema_path"], + }), + handler: Box::new(|params| async { + let schema = params["schema_path"].as_str().unwrap(); + let query = params.get("query").and_then(|v| v.as_str()); + schema_query_tool(schema, query).await + }), + }); + + Self { tools } + } +} +``` + +### RAG System Implementation + +```rust +// platform/crates/rag/src/lib.rs + +pub struct RAGService { + vector_store: Box, + embeddings: EmbeddingModel, + indexer: DocumentIndexer, +} + +impl RAGService { + pub async fn index_all(&self) -> Result<()> { + // Index Nickel schemas + self.index_schemas("provisioning/schemas").await?; + + // Index documentation + self.index_docs("docs").await?; + + // Index past deployments + self.index_deployments("workspaces").await?; + + // Index best practices + self.index_patterns(".claude/patterns").await?; + + Ok(()) + } + + pub async fn retrieve( + &self, + query: &str, + schema: &NickelSchema, + ) -> Result { + // Generate query embedding + let query_embedding = self.embeddings.embed(query).await?; + + // Search vector store + let results = self.vector_store.search( + embedding: query_embedding, + top_k: 10, + filter: Some(json!({ + "schema": schema.name(), + })), + ).await?; + + // Rerank results + let reranked = self.rerank(query, results).await?; + + // Build context + Ok(RAGContext { + query: query.to_string(), + schema_definition: schema.to_string(), + relevant_docs: reranked.iter() + .take(5) + .map(|r| r.content.clone()) + .collect(), + similar_configs: self.find_similar_configs(schema).await?, + best_practices: self.find_best_practices(schema).await?, + }) + } +} + +#[async_trait] +pub trait VectorStore { + async fn insert(&self, id: &str, embedding: Vec, metadata: Value) -> Result<()>; + async fn search(&self, embedding: Vec, top_k: usize, filter: Option) -> Result>; +} + +// Qdrant implementation +pub struct QdrantStore { + client: qdrant::QdrantClient, + collection: String, +} +``` + +### typdialog-ai Integration + +```rust +// typdialog-ai/src/form_assistant.rs + +pub struct FormAssistant { + ai_service: Arc, +} + +impl FormAssistant { + pub async fn suggest_field_value( + &self, + field: &FieldDefinition, + partial_input: &str, + form_context: &FormContext, + ) -> Result> { + self.ai_service.suggest_field_value( + field, + partial_input, + form_context, + ).await + } + + pub async fn explain_error( + &self, + error: &ValidationError, + field_value: &str, + ) -> Result { + let explanation = self.ai_service.explain_validation_error( + error, + field_value, + ).await?; + + Ok(format!( + "Error: {}\n\nExplanation: {}\n\nSuggested fix: {}", + error.message, + explanation.plain_english, + explanation.suggested_fix, + )) + } + + pub async fn fill_from_natural_language( + &self, + description: &str, + form_schema: &FormSchema, + ) -> Result> { + let prompt = format!( + "User wants to: {}\n\nForm schema: {}\n\nGenerate field values:", + description, + serde_json::to_string_pretty(form_schema)?, + ); + + let generated = self.ai_service.generate_config( + &prompt, + &form_schema.nickel_schema, + None, + ).await?; + + Ok(generated.field_values) + } +} +``` + +### typdialog-ag Agents + +```rust +// typdialog-ag/src/agent.rs + +pub struct ProvisioningAgent { + ai_service: Arc, + orchestrator: Arc, + max_iterations: usize, +} + +impl ProvisioningAgent { + pub async fn execute_goal(&self, goal: &str) -> Result { + let mut state = AgentState::new(goal); + + for iteration in 0..self.max_iterations { + // AI determines next action + let action = self.ai_service.agent_next_action(&state).await?; + + // Execute action (with human approval for critical operations) + let result = self.execute_action(&action, &state).await?; + + // Update state + state.update(action, result); + + // Check if goal achieved + if state.goal_achieved() { + return Ok(AgentResult::Success(state)); + } + } + + Err(AgentError::MaxIterationsReached) + } + + async fn execute_action( + &self, + action: &AgentAction, + state: &AgentState, + ) -> Result { + match action { + AgentAction::GenerateConfig { description } => { + let config = self.ai_service.generate_config( + description, + &state.target_schema, + Some(state.context.clone()), + ).await?; + + Ok(ActionResult::ConfigGenerated(config)) + }, + + AgentAction::Deploy { config } => { + // Require human approval for deployment + let approval = prompt_user_approval( + "Agent wants to deploy. Approve?", + config, + ).await?; + + if !approval.approved { + return Ok(ActionResult::DeploymentRejected); + } + + let deployment = self.orchestrator.deploy(config).await?; + Ok(ActionResult::Deployed(deployment)) + }, + + AgentAction::Troubleshoot { deployment_id } => { + let report = self.ai_service.troubleshoot_deployment( + deployment_id, + &self.orchestrator.get_logs(deployment_id).await?, + ).await?; + + Ok(ActionResult::TroubleshootingReport(report)) + }, + } + } +} +``` + +### Cedar Policies for AI + +```cedar +// AI cannot access secrets without explicit permission +forbid( + principal == Service::"ai-service", + action == Action::"read", + resource in Secret::"*" +); + +// AI can generate configs for non-production environments without approval +permit( + principal == Service::"ai-service", + action == Action::"generate_config", + resource in Schema::"*" +) when { + resource.environment in ["dev", "staging"] +}; + +// AI config generation for production requires senior engineer approval +permit( + principal in Group::"senior-engineers", + action == Action::"approve_ai_config", + resource in Config::"*" +) when { + resource.environment == "production" && + resource.generated_by == "ai-service" +}; + +// AI agents cannot deploy without human approval +forbid( + principal == Service::"ai-agent", + action == Action::"deploy", + resource == Infrastructure::"*" +) unless { + context.human_approved == true +}; +``` + +## Testing Strategy + +**Unit Tests**: +```rust +#[tokio::test] +async fn test_ai_config_generation_validates() { + let ai_service = mock_ai_service(); + + let generated = ai_service.generate_config( + "Create a PostgreSQL database with encryption", + &postgres_schema(), + None, + ).await.unwrap(); + + // Must validate against schema + assert!(generated.validation.is_valid()); + assert_eq!(generated.config["engine"], "postgres"); + assert_eq!(generated.config["encryption_enabled"], true); +} + +#[tokio::test] +async fn test_ai_cannot_access_secrets() { + let ai_service = ai_service_with_cedar(); + + let result = ai_service.get_secret("database/password").await; + + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), AIError::PermissionDenied); +} +``` + +**Integration Tests**: +```rust +#[tokio::test] +async fn test_end_to_end_ai_config_generation() { + // User provides natural language + let description = "Create a production Kubernetes cluster in AWS with 5 nodes"; + + // AI generates config + let generated = ai_service.generate_config(description).await.unwrap(); + + // Nickel validation + let validation = nickel_validate(&generated.config).await.unwrap(); + assert!(validation.is_valid()); + + // Human approval + let approval = Approval { + user: "senior-engineer@example.com", + approved: true, + timestamp: Utc::now(), + }; + + // Deploy + let deployment = orchestrator.deploy_with_approval( + generated.config, + approval, + ).await.unwrap(); + + assert_eq!(deployment.status, DeploymentStatus::Success); +} +``` + +**RAG Quality Tests**: +```rust +#[tokio::test] +async fn test_rag_retrieval_accuracy() { + let rag = rag_service(); + + // Index test documents + rag.index_all().await.unwrap(); + + // Query + let context = rag.retrieve( + "How to configure PostgreSQL with encryption?", + &postgres_schema(), + ).await.unwrap(); + + // Should retrieve relevant docs + assert!(context.relevant_docs.iter().any(|doc| { + doc.contains("encryption") && doc.contains("postgres") + })); + + // Should retrieve similar configs + assert!(!context.similar_configs.is_empty()); +} +``` + +## Security Considerations + +**AI Access Control**: +```bash +AI Service Permissions (enforced by Cedar): +✅ CAN: Read Nickel schemas +✅ CAN: Generate configurations +✅ CAN: Query documentation +✅ CAN: Analyze deployment logs (sanitized) +❌ CANNOT: Access secrets directly +❌ CANNOT: Deploy without approval +❌ CANNOT: Modify Cedar policies +❌ CANNOT: Access user credentials +``` + +**Data Privacy**: +```toml +[ai.privacy] +# Sanitize before sending to LLM +sanitize_secrets = true +sanitize_pii = true +sanitize_credentials = true + +# What gets sent to LLM: +# ✅ Nickel schemas (public) +# ✅ Documentation (public) +# ✅ Error messages (sanitized) +# ❌ Secret values (never) +# ❌ Passwords (never) +# ❌ API keys (never) +``` + +**Audit Trail**: +```rust +// Every AI operation logged +pub struct AIAuditLog { + timestamp: DateTime, + user: UserId, + operation: AIOperation, + input_prompt: String, + generated_output: String, + validation_result: ValidationResult, + human_approval: Option, + deployment_outcome: Option, +} +``` + +## Cost Analysis + +**Estimated Costs** (per month, based on typical usage): + +```text +Assumptions: +- 100 active users +- 10 AI config generations per user per day +- Average prompt: 2000 tokens +- Average response: 1000 tokens + +Provider: Anthropic Claude Sonnet +Cost: $3 per 1M input tokens, $15 per 1M output tokens + +Monthly cost: += 100 users × 10 generations × 30 days × (2000 input + 1000 output tokens) += 100 × 10 × 30 × 3000 tokens += 90M tokens += (60M input × $3/1M) + (30M output × $15/1M) += $180 + $450 += $630/month + +With caching (50% hit rate): += $315/month +``` + +**Cost optimization strategies**: +- Caching (50-80% cost reduction) +- Streaming (lower latency, same cost) +- Local models for non-critical operations (zero marginal cost) +- Rate limiting (prevent runaway costs) + +## References + +- [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) +- [Anthropic Claude API](https://docs.anthropic.com/claude/reference/getting-started) +- [OpenAI GPT-4 API](https://platform.openai.com/docs/api-reference) +- [Qdrant Vector Database](https://qdrant.tech/) +- [RAG Survey Paper](https://arxiv.org/abs/2312.10997) +- ADR-008: Cedar Authorization (AI access control) +- ADR-011: Nickel Migration (schema-driven AI) +- ADR-013: Typdialog Web UI Backend (AI-assisted forms) +- ADR-014: SecretumVault Integration (AI-secret isolation) + +--- + +**Status**: Accepted +**Last Updated**: 2025-01-08 +**Implementation**: Planned (High Priority) +**Estimated Complexity**: Very Complex +**Dependencies**: ADR-008, ADR-011, ADR-013, ADR-014 diff --git a/scripts/ensure-typedialog.sh b/scripts/ensure-typedialog.sh new file mode 100755 index 0000000..bb5fca5 --- /dev/null +++ b/scripts/ensure-typedialog.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash +# ensure-typedialog.sh +# Ensures TypeDialog is installed and available in PATH +# Used by setup commands and scripts that depend on TypeDialog +# +# Usage: +# source ensure-typedialog.sh # Load as functions +# ensure_typedialog_installed # Check and install if needed +# ensure_typedialog_component tui # Ensure specific component + +set -euo pipefail + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +INSTALLER_SCRIPT="${SCRIPT_DIR}/install-typedialog.sh" + +# ============================================================================ +# Helper Functions +# ============================================================================ + +print_info() { + echo -e "${BLUE}ℹ️ $1${NC}" >&2 +} + +print_success() { + echo -e "${GREEN}✅ $1${NC}" >&2 +} + +print_warning() { + echo -e "${YELLOW}⚠️ $1${NC}" >&2 +} + +print_error() { + echo -e "${RED}❌ $1${NC}" >&2 +} + +check_command() { + command -v "$1" &> /dev/null +} + +# ============================================================================ +# TypeDialog Installation Checks +# ============================================================================ + +ensure_typedialog_installed() { + local force=${1:-false} + + if check_command "typedialog"; then + local version=$(typedialog --version 2>/dev/null | head -1) + print_success "TypeDialog available ($version)" + return 0 + fi + + if [[ "$force" == "true" ]]; then + print_warning "TypeDialog not found - installing..." + + if [[ ! -f "$INSTALLER_SCRIPT" ]]; then + print_error "TypeDialog installer not found at: $INSTALLER_SCRIPT" + return 1 + fi + + # Run installer + if bash "$INSTALLER_SCRIPT" --components all --skip-validation 2>&1 | grep -E "(✅|❌|⚠️)" >&2; then + if check_command "typedialog"; then + print_success "TypeDialog installed successfully" + return 0 + else + print_error "Installation completed but typedialog not in PATH" + return 1 + fi + else + print_error "TypeDialog installation failed" + return 1 + fi + else + print_warning "TypeDialog not found (--force to install)" + return 1 + fi +} + +ensure_typedialog_component() { + local component=$1 + local component_binary="typedialog" + + case "$component" in + cli) component_binary="typedialog" ;; + tui) component_binary="typedialog-tui" ;; + web) component_binary="typedialog-web" ;; + ag) component_binary="typedialog-ag" ;; + ai) component_binary="typedialog-ai" ;; + prov-gen) component_binary="typedialog-prov-gen" ;; + *) + print_error "Unknown component: $component" + return 1 + ;; + esac + + if check_command "$component_binary"; then + local version=$("$component_binary" --version 2>/dev/null | head -1 || echo "unknown") + print_success "TypeDialog $component available ($version)" + return 0 + else + print_warning "TypeDialog $component not available" + return 1 + fi +} + +ensure_typedialog_backends() { + local required_backends=${1:-"cli"} # Default to CLI + + print_info "Checking required backends: $required_backends" + + IFS=',' read -ra backends <<< "$required_backends" + + local missing=() + for backend in "${backends[@]}"; do + backend=$(echo "$backend" | xargs) # trim + + if ! ensure_typedialog_component "$backend"; then + missing+=("$backend") + fi + done + + if [[ ${#missing[@]} -gt 0 ]]; then + print_warning "Missing backends: ${missing[*]}" + return 1 + fi + + print_success "All required backends available" + return 0 +} + +# ============================================================================ +# Version Checking +# ============================================================================ + +check_typedialog_version() { + if ! check_command "typedialog"; then + print_error "TypeDialog not installed" + return 1 + fi + + local installed=$(typedialog --version 2>/dev/null | head -1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown") + local required=$(grep -A 2 'name = "typedialog"' "${PROJECT_ROOT}/core/versions.ncl" 2>/dev/null | grep 'current = "' | sed 's/.*current = "\([^"]*\)".*/\1/' || echo "unknown") + + echo "$installed" + + if [[ "$installed" == "$required" ]] || [[ "$required" == "unknown" ]]; then + return 0 + fi + + print_warning "Version mismatch: installed=$installed, expected=$required" + return 1 +} + +# ============================================================================ +# Main Entry Point (when run as script) +# ============================================================================ + +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + # Script mode + case "${1:-check}" in + check) + ensure_typedialog_installed false + ;; + install|ensure) + ensure_typedialog_installed true + ;; + component) + ensure_typedialog_component "${2:-cli}" + ;; + backends) + ensure_typedialog_backends "${2:-cli}" + ;; + version) + check_typedialog_version + ;; + help) + cat <&2 +} + +debug() { + if [[ "$VERBOSE" == "true" ]]; then + echo -e "${BLUE}🔍 $1${NC}" + fi +} + +# ============================================================================ +# Platform Detection +# ============================================================================ + +detect_os() { + local os="" + case "$(uname -s)" in + Darwin*) os="macos" ;; + Linux*) os="linux" ;; + MINGW*|MSYS*|CYGWIN*) os="windows" ;; + *) print_error "Unsupported OS: $(uname -s)"; return 1 ;; + esac + echo "$os" +} + +detect_arch() { + local arch="" + case "$(uname -m)" in + x86_64|amd64) arch="x86_64" ;; + aarch64|arm64) arch="aarch64" ;; + *) print_error "Unsupported architecture: $(uname -m)"; return 1 ;; + esac + echo "$arch" +} + +get_binary_name() { + local component=$1 + local os=$2 + + # Map component name to binary name + local binary="typedialog" + case "$component" in + tui) binary="typedialog-tui" ;; + web) binary="typedialog-web" ;; + ag) binary="typedialog-ag" ;; + ai) binary="typedialog-ai" ;; + prov-gen) binary="typedialog-prov-gen" ;; + *) binary="typedialog" ;; + esac + + case "$os" in + macos|linux) + echo "${binary}" + ;; + windows) + echo "${binary}.exe" + ;; + *) + echo "${binary}" + ;; + esac +} + +# ============================================================================ +# Version Management +# ============================================================================ + +extract_version_from_ncl() { + local tool=$1 + + if [[ ! -f "$VERSIONS_FILE" ]]; then + debug "versions.ncl not found at $VERSIONS_FILE" + return 1 + fi + + # Extract version for tool using grep + local version_line=$(grep -A 2 "name = \"$tool\"" "$VERSIONS_FILE" | grep "current =" | head -1) + + if [[ -z "$version_line" ]]; then + return 1 + fi + + # Extract version string (format: current = "X.Y.Z",) + local version=$(echo "$version_line" | sed 's/.*current = "\([^"]*\)".*/\1/') + + if [[ -z "$version" || "$version" == "$version_line" ]]; then + return 1 + fi + + echo "$version" +} + +# ============================================================================ +# Installation Functions +# ============================================================================ + +ensure_install_dir() { + local dir=$1 + + if [[ ! -d "$dir" ]]; then + print_info "Creating installation directory: $dir" + mkdir -p "$dir" || { + print_error "Failed to create directory: $dir" + return 1 + } + fi + + # Verify directory is writable + if [[ ! -w "$dir" ]]; then + print_error "Installation directory is not writable: $dir" + return 1 + fi +} + +download_binary() { + local binary=$1 + local version=$2 + local os=$3 + local arch=$4 + local output_path=$5 + + # GitHub release URL pattern + local github_repo="typedialog/typedialog" + local release_url="https://github.com/${github_repo}/releases/download/v${version}" + + # Determine binary name + local binary_name=$(get_binary_name "$binary" "$os") + local download_url="${release_url}/${binary_name}-${os}-${arch}" + + print_info "Downloading: ${binary} (${version}) for ${os}/${arch}" + debug "URL: $download_url" + + # Download with curl + if ! curl -fsSL --progress-bar "$download_url" -o "$output_path"; then + print_error "Failed to download from: $download_url" + return 1 + fi + + # Verify downloaded file + if [[ ! -f "$output_path" ]]; then + print_error "Downloaded file not found: $output_path" + return 1 + fi + + # Check file size (should not be tiny) + local file_size=$(stat -f%z "$output_path" 2>/dev/null || stat -c%s "$output_path" 2>/dev/null || echo 0) + if [[ $file_size -lt 1000000 ]]; then # Less than 1MB = likely error + print_warning "Downloaded file seems too small ($file_size bytes), may be error page" + return 1 + fi + + print_success "Downloaded: $output_path" +} + +install_binary() { + local source=$1 + local dest=$2 + local binary_name=$3 + + # Copy binary + if ! cp "$source" "$dest"; then + print_error "Failed to copy binary from $source to $dest" + return 1 + fi + + # Make executable + if ! chmod +x "$dest"; then + print_error "Failed to make binary executable: $dest" + return 1 + fi + + print_success "Installed: $dest" +} + +validate_installation() { + local binary=$1 + local expected_version=$2 + + # Check if binary exists in PATH + if ! command -v "$binary" &> /dev/null; then + print_error "Binary not found in PATH: $binary" + return 1 + fi + + # Get installed version + local installed_version=$("$binary" --version 2>&1 | head -1 | sed 's/.*\s\([0-9.]*\)$/\1/' || echo "") + + if [[ -z "$installed_version" ]]; then + print_warning "Could not determine installed version of $binary" + if [[ "$SKIP_VALIDATION" == "false" ]]; then + return 1 + fi + return 0 + fi + + # Check version match + if [[ "$installed_version" != "$expected_version" ]]; then + print_warning "Version mismatch: expected $expected_version, got $installed_version" + if [[ "$FORCE" != "true" ]]; then + return 1 + fi + fi + + print_success "Validated: $binary ($installed_version)" +} + +# ============================================================================ +# Main Installation Flow +# ============================================================================ + +install_component() { + local component=$1 + local version=$2 + local install_dir=$3 + local os=$4 + local arch=$5 + local component_type=${6:-""} # "backend" or "tool" for logging + + local binary_name=$(get_binary_name "$component" "$os") + local display_name="typedialog" + + if [[ "$component" != "cli" ]]; then + display_name="typedialog-${component}" + fi + + print_info "Installing: ${display_name}${component_type:+ ($component_type)}" + + # Create temp directory for downloads + local temp_dir=$(mktemp -d) + trap "rm -rf $temp_dir" EXIT + + # Download component binary + local temp_binary="${temp_dir}/${binary_name}" + download_binary "$component" "$version" "$os" "$arch" "$temp_binary" || { + print_warning "Failed to download ${display_name} (may not be available for this version/platform)" + return 0 # Don't fail if component unavailable + } + + # Install component binary + install_binary "$temp_binary" "${install_dir}/${binary_name}" "${display_name}" || { + print_warning "Failed to install ${display_name}" + return 0 # Don't fail if installation fails + } + + return 0 +} + +install_components() { + local components=$1 + local version=$2 + local install_dir=$3 + local os=$4 + local arch=$5 + + if [[ -z "$components" || "$components" == "none" ]]; then + print_info "No components specified for installation" + return 0 + fi + + # Expand "all" and "backends"/"tools" aliases + local expanded_components="" + + if [[ "$components" == "all" ]]; then + expanded_components="${ALL_COMPONENTS[*]}" + elif [[ "$components" == "backends" ]]; then + expanded_components="${BACKENDS[*]}" + elif [[ "$components" == "tools" ]]; then + expanded_components="${TOOLS[*]}" + else + # Assume comma-separated list + expanded_components=$(echo "$components" | sed 's/,/ /g') + fi + + print_header "Installing TypeDialog Components" + + local installed_count=0 + local failed_count=0 + + for component in $expanded_components; do + component=$(echo "$component" | xargs) # trim whitespace + + if [[ -z "$component" ]]; then + continue + fi + + # Validate component name + if [[ ! " ${ALL_COMPONENTS[*]} " =~ " ${component} " ]]; then + print_warning "Unknown component: $component" + continue + fi + + # Determine component type for logging + local ctype="" + if [[ " ${BACKENDS[*]} " =~ " ${component} " ]]; then + ctype="backend" + elif [[ " ${TOOLS[*]} " =~ " ${component} " ]]; then + ctype="tool" + fi + + if install_component "$component" "$version" "$install_dir" "$os" "$arch" "$ctype"; then + ((installed_count++)) + else + ((failed_count++)) + fi + done + + echo "" + print_success "Installation completed: ${installed_count} component(s) installed" + if [[ $failed_count -gt 0 ]]; then + print_warning "${failed_count} component(s) failed" + fi +} + +validate_all() { + local install_dir=$1 + + print_header "Validating Installation" + + # Validate main binary + validate_installation "typedialog" "$TYPEDIALOG_VERSION" || return 1 + + # Validate installed backends + for backend in tui web; do + if command -v "typedialog-${backend}" &> /dev/null; then + validate_installation "typedialog-${backend}" "$TYPEDIALOG_VERSION" || true + fi + done + + return 0 +} + +# ============================================================================ +# Help and Configuration +# ============================================================================ + +show_help() { + cat <" + print_info " 4. Generate configs: typedialog-prov-gen generate --help" + print_info " 5. Use AI assistant: typedialog-ai serve" + echo "" + print_info "Documentation:" + print_info " - provisioning/.typedialog/platform/README.md" + print_info " - provisioning/.typedialog/provisioning/form.toml" + echo "" + + return 0 +} + +# Run main function +main "$@" diff --git a/scripts/setup-platform-config.sh b/scripts/setup-platform-config.sh index 9ec060c..02d224d 100755 --- a/scripts/setup-platform-config.sh +++ b/scripts/setup-platform-config.sh @@ -15,6 +15,19 @@ set -euo pipefail +# Ensure TypeDialog is installed +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/ensure-typedialog.sh" || true + +# Check TypeDialog availability +if ! command -v typedialog &> /dev/null; then + echo -e "\033[1;33m⚠️ TypeDialog not found. Attempting installation...\033[0m" + ensure_typedialog_installed true || { + echo -e "\033[0;31m❌ Failed to install TypeDialog\033[0m" + exit 1 + } +fi + # Colors for output RED='\033[0;31m' GREEN='\033[0;32m'