Vapora/.ontology/core.ncl

let d = import "defaults/core.ncl" in

{
  nodes = [

    # ── Axioms (invariant = true) ─────────────────────────────────────────────

    d.make_node {
      id          = "async-first",
      name        = "Async-First Architecture",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "All I/O is non-blocking. Tokio is the only async runtime. Sync operations use spawn_blocking. No blocking calls in async context, no sleep-based coordination.",
      invariant   = true,
      artifact_paths = ["crates/vapora-backend/src/main.rs", "crates/vapora-agents/src/coordinator.rs"],
    },

    d.make_node {
      id          = "cost-aware-routing",
      name        = "Cost-Aware LLM Routing",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "Every LLM call is subject to per-role budget enforcement. Budget limits are non-negotiable — calls that exceed budget fall back to cheaper providers or are rejected. Cost tracking is per provider, task type, and token count.",
      invariant   = true,
      artifact_paths = [
        "crates/vapora-llm-router/src/budget.rs",
        "crates/vapora-llm-router/src/cost_tracker.rs",
        "crates/vapora-llm-router/src/cost_ranker.rs",
      ],
    },

    d.make_node {
      id          = "multi-tenant-isolation",
      name        = "Multi-Tenant Isolation",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "All data is scoped per workspace/tenant via SurrealDB scopes. No query may access records outside the authenticated scope. RBAC via Cedar policies.",
      invariant   = true,
      artifact_paths = [
        "crates/vapora-backend/src/services/",
        "crates/vapora-backend/src/audit.rs",
      ],
    },

    d.make_node {
      id          = "learning-based-selection",
      name        = "Learning-Based Agent Selection",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "Agent selection is driven by expertise profiles built from execution history with recency bias (7-day window, 3x weight). Scoring: 0.3*load + 0.5*expertise + 0.2*confidence. Cold-start agents receive neutral confidence to prevent overfitting on small samples.",
      invariant   = true,
      artifact_paths = [
        "crates/vapora-agents/src/learning_profile.rs",
        "crates/vapora-agents/src/scoring.rs",
      ],
    },

    d.make_node {
      id          = "message-based-coordination",
      name        = "Message-Based Agent Coordination",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "Agents never call each other directly. All coordination is via NATS JetStream messages. Backend never calls agent methods directly — only enqueues jobs via AgentCoordinator.",
      invariant   = true,
      artifact_paths = [
        "crates/vapora-agents/src/coordinator.rs",
        "crates/vapora-agents/src/messages.rs",
      ],
    },

    d.make_node {
      id          = "surreal-persistence",
      name        = "SurrealDB as Single Source of Truth",
      pole        = 'Yang,
      level       = 'Axiom,
      description = "All persistent state lives in SurrealDB. In-memory structures (DashMap, Arc<RwLock>) are caches only. Any data that must survive restart must be written to SurrealDB with parameterized bindings.",
      invariant   = true,
      artifact_paths = [
        "crates/vapora-backend/src/services/",
        "migrations/",
      ],
    },

    # ── Tensions ──────────────────────────────────────────────────────────────

    d.make_node {
      id          = "cost-vs-capability",
      name        = "Cost vs Capability",
      pole        = 'Spiral,
      level       = 'Tension,
      description = "Higher-capability models produce better results but consume budget faster. Budget enforcement resolves this by degrading gracefully to cheaper providers when limits approach — capability is sacrificed before budget is breached.",
    },

    d.make_node {
      id          = "learning-convergence-vs-freshness",
      name        = "Learning Convergence vs Freshness",
      pole        = 'Spiral,
      level       = 'Tension,
      description = "More execution history improves expertise scoring accuracy but introduces staleness. The 7-day recency window with 3x bias is the resolution: distant history is not discarded but is outweighted by recent performance.",
    },

    d.make_node {
      id          = "agent-autonomy-vs-budget-control",
      name        = "Agent Autonomy vs Budget Control",
      pole        = 'Spiral,
      level       = 'Tension,
      description = "Agents need to select the best provider for a task (autonomy) but operators need guaranteed cost boundaries (control). Three-tier enforcement (normal -> near-threshold -> exceeded) is the resolution: agents operate freely until approaching limits, then are constrained.",
    },

    d.make_node {
      id          = "wasm-isolation-vs-ssr",
      name        = "WASM Isolation vs SSR Capability",
      pole        = 'Spiral,
      level       = 'Tension,
      description = "Leptos CSR-only mode provides clean WASM isolation with no SSR complexity but sacrifices initial load performance and SEO. Current resolution: CSR-only. SSR is not in scope unless Leptos hydration issues are resolved.",
    },

    # ── Practices ─────────────────────────────────────────────────────────────

    d.make_node {
      id          = "knowledge-graph-execution-history",
      name        = "Knowledge Graph as Execution Memory",
      pole        = 'Yin,
      level       = 'Practice,
      description = "All agent executions are recorded as temporal nodes in the knowledge graph. Learning curves are computed from daily-windowed aggregations. Similarity search uses a hybrid retrieval pipeline: HNSW (SurrealDB 3 native ANN) + BM25 full-text search fused via Reciprocal Rank Fusion (k=60). Pure HNSW was rejected because it misses exact keyword matches (crate names, error codes); pure BM25 was rejected because it ignores semantic proximity. Migration 012 adds the required HNSW and full-text indexes and fixes a pre-existing SCHEMAFULL schema gap that caused silent deserialization failures.",
      artifact_paths = [
        "crates/vapora-knowledge-graph/src/learning.rs",
        "crates/vapora-knowledge-graph/src/persistence.rs",
        "migrations/012_kg_hybrid_search.surql",
      ],
    },

    d.make_node {
      id          = "swarm-load-balanced-assignment",
      name        = "Swarm Load-Balanced Task Assignment",
      pole        = 'Yin,
      level       = 'Practice,
      description = "Swarm assigns tasks via capability-based filtering then load-balanced scoring: success_rate / (1 + load). NATS JetStream is optional -- swarm degrades gracefully if unavailable.",
      artifact_paths = [
        "crates/vapora-swarm/src/coordinator.rs",
        "crates/vapora-swarm/src/metrics.rs",
      ],
    },

    d.make_node {
      id          = "provider-abstraction",
      name        = "LLMClient Trait for Provider Abstraction",
      pole        = 'Yin,
      level       = 'Practice,
      description = "All LLM calls go through the LLMClient trait. No crate may call provider APIs directly -- only via the router. This is the enforcement point for cost tracking, fallback chains, and provider substitution.",
      artifact_paths = ["crates/vapora-llm-router/src/providers.rs"],
    },

    d.make_node {
      id          = "ontoref-protocol-adoption",
      name        = "Ontoref Protocol Adoption",
      pole        = 'Yin,
      level       = 'Practice,
      description = "Vapora uses the ontoref protocol for self-description: ontology, ADRs, reflection modes, and operational state. The ontoref CLI is the single entry point for on+re operations. Supersedes the previous stratumiops-based reflection mode imports.",
      artifact_paths = [
        ".ontology/",
        "adrs/",
        "reflection/",
        ".ontoref/config.ncl",
      ],
    },

    d.make_node {
      id          = "workflow-orchestration",
      name        = "Multi-Stage Workflow Orchestration",
      pole        = 'Yin,
      level       = 'Practice,
      description = "Workflows are NCL-defined DAGs of stages with typed artifacts and approval gates. NATS JetStream drives stage progression. Short-lived agent contexts (95% cache token reduction) are enforced per stage. Four built-in templates: feature_development, bugfix, documentation_update, security_audit.",
      artifact_paths = [
        "crates/vapora-workflow-engine/",
      ],
    },

    d.make_node {
      id          = "a2a-protocol",
      name        = "Agent-to-Agent Protocol",
      pole        = 'Yin,
      level       = 'Practice,
      description = "A2A server exposes a standardized protocol for external agents and systems to interact with vapora agents. A2A client provides the counterpart library. Both communicate via NATS JetStream and HTTP. Enables federation with external agent ecosystems.",
      artifact_paths = [
        "crates/vapora-a2a/",
        "crates/vapora-a2a-client/",
      ],
    },

    d.make_node {
      id          = "mcp-gateway",
      name        = "MCP Protocol Gateway",
      pole        = 'Yin,
      level       = 'Practice,
      description = "MCP server bridges the Model Context Protocol to vapora's agent runtime. Exposes vapora capabilities as MCP tools consumable by Claude Code and other MCP-aware clients. Plugin mode enables embedding in documentation lifecycle workflows.",
      artifact_paths = [
        "crates/vapora-mcp-server/",
      ],
    },

    d.make_node {
      id          = "cedar-rbac",
      name        = "Cedar Policy Engine for Workflow Authorization",
      pole        = 'Yin,
      level       = 'Practice,
      description = "CedarAuthorizer in vapora-workflow-engine loads .cedar policy files at startup and evaluates every stage execution request before dispatch. Policies are version-controlled in the repo. No ad-hoc role checks in stage execution code — all authorization decisions go through Cedar.",
      artifact_paths = [
        "crates/vapora-workflow-engine/src/auth.rs",
      ],
    },

    d.make_node {
      id          = "security-api-boundary",
      name        = "Security Scanning at API Boundary",
      pole        = 'Yin,
      level       = 'Practice,
      description = "SSRF protection (ssrf.rs) validates all outbound URLs against private/reserved address ranges before dispatch. Prompt injection scanning (prompt_injection.rs) rejects known injection payloads at the API boundary before user input reaches any LLM provider. Security rejections return 400 Bad Request. Channel webhook URLs with SSRF-risky targets are dropped at startup, not registered with a warning.",
      artifact_paths = [
        "crates/vapora-backend/src/security/ssrf.rs",
        "crates/vapora-backend/src/security/prompt_injection.rs",
      ],
    },

    d.make_node {
      id          = "notification-channels",
      name        = "Notification Channels with Agent-Inactive Alerts",
      pole        = 'Yin,
      level       = 'Practice,
      description = "ChannelRegistry manages webhook-based notification channels (Slack, generic HTTP) resolved at startup. SSRF validation drops unsafe URLs before registration. NotificationService triggers agent-inactive alerts when no heartbeat is received within the threshold window. Channels are first-class entities stored in SurrealDB; notification config is version-controlled as NCL contracts in nickel/channels/.",
      artifact_paths = [
        "crates/vapora-backend/src/api/channels.rs",
        "nickel/channels/contracts.ncl",
      ],
    },

    d.make_node {
      id          = "vapora-capabilities",
      name        = "Capability Packages — Zero-Config Agent Bundles",
      pole        = 'Yin,
      level       = 'Practice,
      description = "vapora-capabilities crate provides CapabilityRegistry, CapabilityLoader, and built-in Capability implementations (CodeReviewer, DocGenerator, PRMonitor). AgentDefinition was relocated to vapora-shared to break the circular dependency that would exist if vapora-capabilities imported vapora-agents. AgentCoordinator gained in-process executor dispatch via DashMap<String, Sender<TaskAssignment>> — the shard lock is released before .await by cloning the Sender out of the map.",
      artifact_paths = [
        "crates/vapora-capabilities/src/lib.rs",
        "crates/vapora-shared/src/agent_definition.rs",
      ],
    },

    d.make_node {
      id          = "agent-hot-reload-stable-identity",
      name        = "Agent Hot-Reload — Stable Role Identity",
      pole        = 'Yin,
      level       = 'Practice,
      description = "AgentMetadata.stable_id is set to role.clone() at construction — before the role field is moved. All learning_profile keys and KG execution records use stable_id_or_role() instead of the ephemeral UUID id. drain_role + re-spawn sequence implements zero-downtime config reload: learning profiles survive the drain because the DashMap key (role) is unchanged. SIGHUP and POST /reload both invoke reload_agents. BudgetManager and LLMRouter are not reloaded by SIGHUP; process restart is required for those.",
      artifact_paths = [
        "crates/vapora-agents/src/registry.rs",
        "crates/vapora-agents/src/coordinator.rs",
        "crates/vapora-agents/src/server.rs",
      ],
    },

    d.make_node {
      id          = "merkle-audit-trail",
      name        = "Merkle Hash-Chain Audit Trail",
      pole        = 'Yin,
      level       = 'Practice,
      description = "audit/mod.rs replaces append-only log with a Merkle hash-chain: block_hash = SHA-256(prev_hash|seq|entry_id|timestamp_rfc3339|workflow_id|event_type|actor|details_json). Genesis entry uses GENESIS_HASH (64 zeros). write_lock: Arc<Mutex<()>> serializes append calls within the process. verify_integrity(workflow_id) recomputes every block hash and returns IntegrityReport{valid, total_entries, first_tampered_seq}. Modifying any covered field in entry N invalidates N and every subsequent entry.",
      artifact_paths = [
        "crates/vapora-backend/src/audit/mod.rs",
        "migrations/013_audit_merkle.surql",
      ],
    },

  ],

  edges = [

    d.make_edge {
      from   = "cost-aware-routing",
      to     = "cost-vs-capability",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "Budget enforcement with three-tier degradation is the concrete resolution of the cost/capability tension.",
    },

    d.make_edge {
      from   = "learning-based-selection",
      to     = "learning-convergence-vs-freshness",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "7-day recency window with 3x bias resolves convergence vs freshness by outweighting stale data without discarding it.",
    },

    d.make_edge {
      from   = "cost-aware-routing",
      to     = "agent-autonomy-vs-budget-control",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "Three-tier enforcement (normal/near-threshold/exceeded) allows agent autonomy until budget boundaries are approached.",
    },

    d.make_edge {
      from   = "knowledge-graph-execution-history",
      to     = "learning-based-selection",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "Execution history recorded in the KG feeds the learning profile scorer for agent selection.",
    },

    d.make_edge {
      from   = "message-based-coordination",
      to     = "async-first",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "NATS JetStream is the async coordination primitive -- agents never block on each other.",
    },

    d.make_edge {
      from   = "provider-abstraction",
      to     = "cost-aware-routing",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "LLMClient trait is the enforcement point for routing rules and budget checks.",
    },

    d.make_edge {
      from   = "surreal-persistence",
      to     = "multi-tenant-isolation",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "SurrealDB scopes are the enforcement mechanism for tenant isolation.",
    },

    d.make_edge {
      from   = "workflow-orchestration",
      to     = "message-based-coordination",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "Workflow stage progression is driven by NATS JetStream events — no direct inter-stage calls.",
    },

    d.make_edge {
      from   = "a2a-protocol",
      to     = "message-based-coordination",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "A2A server uses NATS JetStream for async agent communication across the federation boundary.",
    },

    d.make_edge {
      from   = "mcp-gateway",
      to     = "provider-abstraction",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "MCP gateway routes tool calls through the LLMClient trait, keeping provider substitution possible.",
    },

    d.make_edge {
      from   = "cedar-rbac",
      to     = "multi-tenant-isolation",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "Cedar policies enforce per-principal authorization constraints on workflow stage execution, complementing SurrealDB scope-based tenant isolation.",
    },

    d.make_edge {
      from   = "security-api-boundary",
      to     = "multi-tenant-isolation",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "SSRF and prompt injection scanning protect the API surface that tenant data flows through, preventing exfiltration via LLM prompts or misconfigured outbound channels.",
    },

    d.make_edge {
      from   = "notification-channels",
      to     = "security-api-boundary",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "Channel webhook URL validation is the first consumer of ssrf.rs; the SSRF validator was introduced to fix the warn-and-register channel bug.",
    },

    d.make_edge {
      from   = "agent-hot-reload-stable-identity",
      to     = "learning-based-selection",
      kind   = 'ManifestsIn,
      weight = 3,
      note   = "Stable identity (role as stable_id) is the prerequisite for learning profiles surviving restarts — without it, every reload orphaned all accumulated expertise.",
    },

    d.make_edge {
      from   = "vapora-capabilities",
      to     = "learning-based-selection",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "Capability bundles provide the agent definitions that the learning scorer operates on — built-in capabilities bootstrap the learning system without manual configuration.",
    },

    d.make_edge {
      from   = "merkle-audit-trail",
      to     = "multi-tenant-isolation",
      kind   = 'ManifestsIn,
      weight = 2,
      note   = "Tamper-evident audit log ensures that per-tenant audit records cannot be silently modified post-hoc, satisfying compliance requirements (SOC 2, ISO 27001) for multi-tenant deployments.",
    },

  ],
}