Vapora/.ontology/manifest.ncl

let m = import "defaults/manifest.ncl" in

m.make_manifest {
  project     = "vapora",
  repo_kind   = 'Service,
  description = "18-crate Rust workspace delivering an intelligent development orchestration platform: specialized AI agents with learning-based selection, cost-aware multi-provider LLM routing, temporal knowledge graph, multi-stage workflow orchestration, Agent-to-Agent protocol, MCP gateway, and a Leptos WASM frontend. Self-hosted on Kubernetes.",

  capabilities = [

    m.make_capability {
      id        = "agent-orchestration",
      name      = "Learning-Based Agent Orchestration",
      summary   = "Assigns tasks to agents using expertise profiles built from execution history with recency bias.",
      rationale = "Static round-robin assignment wastes budget on suboptimal providers. Learning profiles route tasks to the agent with the highest demonstrated success rate for that task type, while confidence weighting prevents overfitting on small samples.",
      how       = "AgentRegistry maintains per-agent state. AgentCoordinator enqueues jobs via NATS JetStream. Swarm scores candidates: 0.3*load + 0.5*expertise + 0.2*confidence. LearningProfile tracks 7-day windowed execution history with 3x recency bias.",
      artifacts = [
        "crates/vapora-agents/src/learning_profile.rs",
        "crates/vapora-agents/src/scoring.rs",
        "crates/vapora-agents/src/coordinator.rs",
        "crates/vapora-swarm/src/coordinator.rs",
      ],
      nodes = ["learning-based-selection", "swarm-load-balanced-assignment", "knowledge-graph-execution-history"],
    },

    m.make_capability {
      id        = "llm-routing",
      name      = "Cost-Aware Multi-Provider LLM Routing",
      summary   = "Routes LLM calls to Claude/OpenAI/Gemini/Ollama with per-role budget enforcement and automatic fallback.",
      rationale = "Direct provider calls bypass cost tracking and make provider substitution impossible. The LLMClient trait is the single enforcement point for routing rules, budget limits, and fallback chains. Three-tier enforcement (normal/near-threshold/exceeded) allows agent autonomy until budget boundaries are approached.",
      how       = "LLMRouter selects provider via routing rules + dynamic scoring. CostTracker maintains per-provider token counts. BudgetEnforcer applies monthly/weekly limits per role. Fallback chains defined in llm-router.toml. All calls go through the LLMClient trait — no crate calls provider APIs directly.",
      artifacts = [
        "crates/vapora-llm-router/src/router.rs",
        "crates/vapora-llm-router/src/budget.rs",
        "crates/vapora-llm-router/src/cost_tracker.rs",
        "crates/vapora-llm-router/src/providers.rs",
      ],
      nodes = ["cost-aware-routing", "provider-abstraction"],
    },

    m.make_capability {
      id        = "knowledge-graph",
      name      = "Temporal Knowledge Graph",
      summary   = "Records agent execution history as temporal nodes; computes learning curves and recommends solutions via similarity search.",
      rationale = "Agents need institutional memory across executions to improve selection accuracy and reuse successful patterns. A temporal graph with causal relationships enables learning curves and similarity-based solution retrieval, which flat logs cannot provide.",
      how       = "Execution results are persisted as SurrealDB graph nodes. LearningCurve computed from daily-windowed aggregations. SimilaritySearch uses cosine similarity over task embeddings. All KG queries go through the persistence layer — no direct SurrealDB calls from scoring code.",
      artifacts = [
        "crates/vapora-knowledge-graph/src/learning.rs",
        "crates/vapora-knowledge-graph/src/persistence.rs",
      ],
      nodes = ["knowledge-graph-execution-history"],
    },

    m.make_capability {
      id        = "workflow-engine",
      name      = "Multi-Stage Workflow Orchestration",
      summary   = "Executes multi-stage agent pipelines with typed artifact passing, approval gates, and Kogral context enrichment.",
      rationale = "Ad-hoc agent invocations have no visibility into pipeline state, no artifact provenance, and no way to pause for human approval. Typed workflows make each stage's inputs and outputs explicit, enable audit trails, and enforce approval gates before irreversible stages (e.g. deployment).",
      how       = "WorkflowEngine executes stage DAGs. NATS JetStream drives stage progression. Artifacts (ADR, Code, TestResults, Review, Documentation) are typed and passed between stages. KogralContextProvider enriches agent prompts from .kogral/ filesystem at each stage.",
      artifacts = [
        "crates/vapora-workflow-engine/",
      ],
      adrs  = [],
      nodes = ["workflow-orchestration"],
    },

    m.make_capability {
      id        = "project-management-api",
      name      = "Project and Task Management REST API",
      summary   = "40+ Axum endpoints for multi-tenant project/task management with real-time WebSocket updates and audit logging.",
      rationale = "Teams need a unified surface for project tracking, agent job dispatch, and deployment visibility. Multi-tenancy via SurrealDB scopes ensures workspace isolation without application-layer filtering.",
      how       = "Axum router with ~40 handlers across projects, tasks, agents, workflows, swarm, and analytics. SurrealDB services layer handles all persistence. Cedar RBAC enforces fine-grained access. AuditTrail records all state mutations. Prometheus metrics at /metrics.",
      artifacts = [
        "crates/vapora-backend/src/api/",
        "crates/vapora-backend/src/services/",
        "crates/vapora-backend/src/audit.rs",
      ],
      nodes = ["multi-tenant-isolation", "surreal-persistence", "async-first"],
    },

    m.make_capability {
      id        = "a2a-protocol",
      name      = "Agent-to-Agent Protocol",
      summary   = "Standardized A2A server + client for federation with external agent ecosystems.",
      rationale = "Vapora agents need to interoperate with Claude Code, external CI agents, and other orchestrators without tight coupling. The A2A protocol provides a stable contract that decouples vapora's internal agent model from external consumers.",
      how       = "vapora-a2a exposes an A2A server over HTTP + NATS. vapora-a2a-client provides the Rust client library. Messages are typed via the A2A schema. 7 E2E integration tests (require live SurrealDB + NATS; marked #[ignore] in CI without services).",
      artifacts = [
        "crates/vapora-a2a/src/main.rs",
        "crates/vapora-a2a-client/",
      ],
      nodes = ["a2a-protocol", "message-based-coordination"],
    },

    m.make_capability {
      id        = "mcp-gateway",
      name      = "MCP Protocol Gateway",
      summary   = "MCP server bridging the Model Context Protocol to vapora agent runtime, consumable by Claude Code and other MCP clients.",
      rationale = "Claude Code and other AI tools speak MCP. Without a gateway, vapora capabilities are invisible to MCP-aware clients. The gateway exposes vapora as a first-class MCP tool provider without modifying the core agent model.",
      how       = "vapora-mcp-server implements the MCP protocol server, translating MCP tool calls into vapora AgentCoordinator invocations. Plugin mode enables embedding inside vapora-doc-lifecycle for documentation workflows.",
      artifacts = ["crates/vapora-mcp-server/"],
      nodes     = ["mcp-gateway"],
    },

    m.make_capability {
      id        = "ontoref-protocol",
      name      = "Ontoref Protocol Adoption",
      summary   = "Vapora is a fully adopted ontoref consumer: typed ontology, ADR lifecycle, reflection modes, API catalog surface.",
      rationale = "Self-description via ontoref provides machine-readable architectural context that agents, CI systems, and contributors can query without reading code. The typed ADR lifecycle enforces architectural decision provenance.",
      how       = "5 .ontology/ files (core, state, gate, manifest, connections) with typed contracts. NCL ADRs with typed constraints and constraint checks. API catalog surface in crates/vapora-backend/src/api/catalog.rs. config_surface declared in manifest.ncl. Git hooks (post-commit, post-merge) notify ontoref daemon of NCL file changes.",
      artifacts = [".ontology/", "adrs/", "crates/vapora-backend/src/api/catalog.rs"],
      nodes     = ["ontoref-protocol-adoption"],
    },

    m.make_capability {
      id        = "frontend-ui",
      name      = "Leptos WASM Frontend",
      summary   = "Reactive Kanban board and agent management UI with glassmorphism aesthetics, built entirely in Rust/WASM.",
      rationale = "Full-stack Rust eliminates the JS/Rust boundary and its associated serialization overhead, security surface, and type mismatch bugs. CSR-only Leptos is chosen over SSR to avoid Leptos hydration complexity at the cost of initial load performance.",
      how       = "Leptos reactive components in CSR mode compiled to WASM via trunk. UnoCSS for atomic styling. Communicates with vapora-backend via Axum REST + WebSocket for real-time updates.",
      artifacts = [
        "crates/vapora-frontend/src/pages/",
        "crates/vapora-frontend/src/components/",
      ],
      nodes = ["async-first", "wasm-isolation-vs-ssr"],
    },

  ],

  requirements = [

    m.make_requirement {
      id        = "rust",
      name      = "Rust toolchain",
      env       = 'Development,
      kind      = 'Tool,
      version   = "1.75+",
      required  = true,
      impact    = "Cannot build any crate. WASM target requires rustup target add wasm32-unknown-unknown.",
      provision = "rustup install stable && rustup target add wasm32-unknown-unknown",
    },

    m.make_requirement {
      id        = "surrealdb",
      name      = "SurrealDB",
      env       = 'Both,
      kind      = 'Service,
      version   = "2.3+",
      required  = true,
      impact    = "All backend services fail on startup. All persistence, multi-tenancy, and knowledge graph queries unavailable.",
      provision = "docker run -d --name surrealdb -p 8000:8000 surrealdb/surrealdb:latest start --bind 0.0.0.0:8000 file://data/database.db",
    },

    m.make_requirement {
      id        = "nats",
      name      = "NATS JetStream",
      env       = 'Production,
      kind      = 'Service,
      version   = "2.x",
      required  = false,
      impact    = "Agent coordination degrades to polling. Workflow stage progression unavailable. A2A protocol federation unavailable. Swarm uses graceful fallback.",
      provision = "docker run -d --name nats -p 4222:4222 nats:latest -js",
    },

    m.make_requirement {
      id        = "trunk",
      name      = "trunk (WASM bundler)",
      env       = 'Development,
      kind      = 'Tool,
      version   = "",
      required  = true,
      impact    = "Cannot build or serve the Leptos WASM frontend.",
      provision = "cargo install trunk",
    },

    m.make_requirement {
      id        = "nickel",
      name      = "Nickel",
      env       = 'Both,
      kind      = 'Tool,
      version   = "",
      required  = true,
      impact    = "Ontoref ontology export, ADR validation, and config surface introspection unavailable.",
      provision = "cargo install nickel-lang-cli or https://nickel-lang.org/user-manual/installation",
    },

    m.make_requirement {
      id        = "anthropic-api-key",
      name      = "ANTHROPIC_API_KEY",
      env       = 'Both,
      kind      = 'EnvVar,
      version   = "",
      required  = false,
      impact    = "Claude provider unavailable. LLM router falls back to other configured providers.",
      provision = "Set ANTHROPIC_API_KEY=sk-ant-... in shell or K8s secret.",
    },

    m.make_requirement {
      id        = "openai-api-key",
      name      = "OPENAI_API_KEY",
      env       = 'Both,
      kind      = 'EnvVar,
      version   = "",
      required  = false,
      impact    = "OpenAI provider unavailable. LLM router falls back to other configured providers.",
      provision = "Set OPENAI_API_KEY=sk-... in shell or K8s secret.",
    },

  ],

  critical_deps = [

    m.make_critical_dep {
      id             = "surrealdb-crate",
      name           = "surrealdb",
      ref            = "crates.io: surrealdb 2.3",
      used_for       = "All persistence: projects, tasks, agents, knowledge graph, audit trail, multi-tenant scopes.",
      failure_impact = "Complete data loss of runtime state. Multi-tenancy enforcement collapses. Knowledge graph and learning curves unavailable. No persistent state survives restart.",
      mitigation     = "SurrealDB 2.x has stable API. No feature-flag fallback — persistence is non-negotiable. Pin major version in Cargo.toml.",
    },

    m.make_critical_dep {
      id             = "async-nats",
      name           = "async-nats",
      ref            = "crates.io: async-nats 0.45",
      used_for       = "Agent coordination (job dispatch, heartbeats), workflow stage progression, A2A protocol federation.",
      failure_impact = "Agent coordination degrades to polling. Workflow orchestration unavailable. A2A federation unavailable. Swarm falls back to direct assignment.",
      mitigation     = "NATS connection is optional for most crates — swarm has graceful fallback. Workflow engine and A2A are blocked without NATS.",
    },

    m.make_critical_dep {
      id             = "axum",
      name           = "axum",
      ref            = "crates.io: axum 0.8.6",
      used_for       = "REST API (40+ endpoints), WebSocket real-time updates, Prometheus metrics endpoint.",
      failure_impact = "Entire HTTP surface unavailable. Frontend cannot communicate with backend. No agent job submission, no project management, no monitoring.",
      mitigation     = "Axum 0.8 has stable API surface. No fallback HTTP framework. Router composition is the only external surface — internal services are framework-agnostic.",
    },

    m.make_critical_dep {
      id             = "rig-core",
      name           = "rig-core",
      ref            = "crates.io: rig-core 0.15",
      used_for       = "LLM agent framework: tool calling, streaming, provider abstractions for Claude/OpenAI/Gemini.",
      failure_impact = "All LLM provider integrations fail. Agent execution unavailable. LLM router cannot dispatch to any provider.",
      mitigation     = "LLMClient trait abstracts rig-core. In principle substitutable but requires reimplementing provider adapters. Monitor rig-core breaking changes closely.",
    },

    m.make_critical_dep {
      id             = "leptos",
      name           = "leptos",
      ref            = "crates.io: leptos 0.8.12",
      used_for       = "WASM frontend: Kanban board, agent management UI, real-time updates.",
      failure_impact = "Frontend build fails entirely. No web UI available. Users must fall back to CLI or raw API.",
      mitigation     = "CSR-only mode minimizes Leptos surface (no hydration, no SSR). Frontend is a separate crate — backend continues operating without it.",
    },

  ],

  config_surface = m.make_config_surface {
    config_root    = "config/",
    entry_point    = "config.ncl",
    kind           = 'NclMerge,
    contracts_path = "nickel",
    overrides_dir  = "config/",
    sections = [
      m.make_config_section {
        id          = "server",
        file        = "server.ncl",
        contract    = "vapora/contracts.ncl",
        description = "Core server settings — host, port, TLS, database URL, NATS connection, JWT auth, logging, metrics.",
        rationale   = "All platform services share these connection parameters. Centralizing them prevents per-service drift and ensures a single source of truth for database and messaging topology.",
        consumers   = [
          m.make_config_consumer { id = "vapora-backend", kind = 'RustStruct, ref = "vapora_backend::config::Config", fields = ["server", "database", "nats", "auth", "logging", "metrics"] },
        ],
      },
      m.make_config_section {
        id          = "llm-router",
        file        = "llm-router.ncl",
        contract    = "llm-router/contracts.ncl",
        description = "LLM provider routing — default provider selection, cost tracking, fallback chains, per-provider API keys and model configs.",
        rationale   = "Routing rules and provider credentials are operator concerns that must be externalized from the binary. NCL validation catches invalid provider combinations and budget inconsistencies before runtime.",
        consumers   = [
          m.make_config_consumer { id = "vapora-llm-router", kind = 'RustStruct, ref = "vapora_llm_router::config::RouterConfig", fields = ["routing", "providers"] },
        ],
      },
      m.make_config_section {
        id          = "agents",
        file        = "agents.ncl",
        contract    = "agents/contracts.ncl",
        description = "Agent registry and definitions — per-role LLM model selection, capabilities, parallelism, priority.",
        rationale   = "Agent definitions are operational configuration, not code. Externalizing them allows role reconfiguration (e.g. swapping claude-opus for claude-sonnet on a role) without recompilation.",
        consumers   = [
          m.make_config_consumer { id = "vapora-agents", kind = 'RustStruct, ref = "vapora_agents::config::AgentsConfig", fields = ["registry", "agents"] },
        ],
      },
      m.make_config_section {
        id          = "budgets",
        file        = "budgets.ncl",
        contract    = "budgets/contracts.ncl",
        description = "Per-role LLM spend limits — monthly/weekly cost caps, alert thresholds, fallback provider on breach.",
        rationale   = "Budget enforcement without configuration externalisation would require code changes for every limit adjustment. NCL contracts validate that alert_threshold is in [0,1] and fallback_provider is a known provider.",
        consumers   = [
          m.make_config_consumer { id = "vapora-llm-router", kind = 'RustStruct, ref = "vapora_llm_router::budget::BudgetConfig", fields = ["budgets"] },
        ],
      },
      m.make_config_section {
        id          = "workflows",
        file        = "workflows.ncl",
        contract    = "workflows/contracts.ncl",
        description = "Workflow engine config and workflow definitions — max parallelism, timeout, approval gates, stage DAGs.",
        rationale   = "Workflow definitions are the primary extension point for adding new orchestration patterns. Keeping them in NCL allows adding workflows without touching Rust and validates stage dependencies before execution.",
        consumers   = [
          m.make_config_consumer { id = "vapora-workflow-engine", kind = 'RustStruct, ref = "vapora_workflow_engine::config::WorkflowConfig", fields = ["engine", "workflows"] },
        ],
      },
      m.make_config_section {
        id          = "channels",
        file        = "channels.ncl",
        contract    = "channels/contracts.ncl",
        description = "Outbound notification channels — Slack, Telegram, and other destinations with per-event routing.",
        rationale   = "Channel credentials and event-to-channel mappings change with team structure. NCL contracts validate that referenced channel names resolve and prevent orphaned notification routes.",
        consumers   = [
          m.make_config_consumer { id = "vapora-backend",  kind = 'RustStruct, ref = "vapora_backend::config::Config",             fields = ["channels", "notifications"] },
          m.make_config_consumer { id = "vapora-channels", kind = 'RustStruct, ref = "vapora_channels::config::ChannelRegistryConfig", fields = ["channels"] },
        ],
      },
    ],
  },

  consumption_modes = [
    m.make_consumption_mode {
      consumer    = 'Developer,
      needs       = ['OntologyExport],
      audit_level = 'Standard,
      description = "Clones repo, runs cargo build/test. Uses ontoref CLI to query ontology, run reflection modes, and track ADRs.",
    },
    m.make_consumption_mode {
      consumer    = 'Agent,
      needs       = ['OntologyExport, 'JsonSchema],
      audit_level = 'Quick,
      description = "Reads .ontology/core.ncl via nickel export. Checks axioms and gates before acting. Uses reflection modes for task dispatch and deployment.",
    },
  ],

  layers = [
    m.make_layer {
      id          = "implementation",
      paths       = [".ontology/", "adrs/", "reflection/"],
      committed   = true,
      description = "Vapora self-description: ontology nodes/edges, ADRs, and reflection modes.",
    },
    m.make_layer {
      id          = "crates",
      paths       = ["crates/", "Cargo.toml", "Cargo.lock"],
      committed   = true,
      description = "17-crate Rust workspace: backend, agents, llm-router, swarm, knowledge-graph, frontend, and supporting crates.",
    },
    m.make_layer {
      id          = "infrastructure",
      paths       = ["kubernetes/", "provisioning/", "migrations/", "docker-compose.yml"],
      committed   = true,
      description = "Deployment manifests, KCL provisioning, SurrealDB migrations, and Docker Compose.",
    },
    m.make_layer {
      id          = "ontoref-framework",
      paths       = ["/Users/Akasha/Development/ontoref/.ontology/"],
      committed   = false,
      description = "Ontoref framework ontology — protocol axioms, practices, and patterns visible in ontoref-browse mode.",
    },
    m.make_layer {
      id          = "process",
      paths       = [".coder/"],
      committed   = false,
      description = "Session artifacts: plans, investigations, summaries. Process memory for actors.",
    },
  ],

  operational_modes = [
    m.make_op_mode {
      id             = "dev",
      description    = "Standard development mode — vapora implementation layer only.",
      visible_layers = ["implementation", "crates", "infrastructure"],
      audit_level    = 'Standard,
    },
    m.make_op_mode {
      id             = "ontoref-browse",
      description    = "Browse ontoref framework capabilities and patterns available to vapora.",
      visible_layers = ["implementation", "ontoref-framework"],
      audit_level    = 'Quick,
    },
  ],
}