From df829421d8baedb00aa092928d66b353e8b2fb48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jesu=CC=81s=20Pe=CC=81rez?= Date: Mon, 16 Feb 2026 05:12:22 +0000 Subject: [PATCH] chore: udate docs, add architecture diagrams --- assets/vapora_architecture.svg | 576 ++++++++++++++++++ assets/vapora_architecture_white.svg | 576 ++++++++++++++++++ assets/web/architecture-diagram.html | 1 + assets/web/src/architecture-diagram.html | 250 ++++++++ assets/web/src/vapora_architecture.svg | 576 ++++++++++++++++++ assets/web/src/vapora_architecture_white.svg | 576 ++++++++++++++++++ assets/web/src/vapora_white.svg | 119 ++++ assets/web/vapora_architecture.svg | 576 ++++++++++++++++++ assets/web/vapora_architecture_white.svg | 576 ++++++++++++++++++ assets/web/vapora_white.svg | 119 ++++ ...8-recursive-language-models-integration.md | 402 ++++++++++++ docs/guides/rlm-usage-guide.md | 540 ++++++++++++++++ migrations/008_rlm_schema.surql | 30 + 13 files changed, 4917 insertions(+) create mode 100644 assets/vapora_architecture.svg create mode 100644 assets/vapora_architecture_white.svg create mode 100644 assets/web/architecture-diagram.html create mode 100644 assets/web/src/architecture-diagram.html create mode 100644 assets/web/src/vapora_architecture.svg create mode 100644 assets/web/src/vapora_architecture_white.svg create mode 100644 assets/web/src/vapora_white.svg create mode 100644 assets/web/vapora_architecture.svg create mode 100644 assets/web/vapora_architecture_white.svg create mode 100644 assets/web/vapora_white.svg create mode 100644 docs/architecture/decisions/008-recursive-language-models-integration.md create mode 100644 docs/guides/rlm-usage-guide.md create mode 100644 migrations/008_rlm_schema.surql diff --git a/assets/vapora_architecture.svg b/assets/vapora_architecture.svg new file mode 100644 index 0000000..024d66f --- /dev/null +++ b/assets/vapora_architecture.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/vapora_architecture_white.svg b/assets/vapora_architecture_white.svg new file mode 100644 index 0000000..2dd47d8 --- /dev/null +++ b/assets/vapora_architecture_white.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/web/architecture-diagram.html b/assets/web/architecture-diagram.html new file mode 100644 index 0000000..bc83a04 --- /dev/null +++ b/assets/web/architecture-diagram.html @@ -0,0 +1 @@ +VAPORA — Architecture
VAPORA Architecture - Dark Mode
diff --git a/assets/web/src/architecture-diagram.html b/assets/web/src/architecture-diagram.html new file mode 100644 index 0000000..adf4d65 --- /dev/null +++ b/assets/web/src/architecture-diagram.html @@ -0,0 +1,250 @@ + + + + + + VAPORA — Architecture + + + + + +
+ VAPORA Architecture - Dark Mode + +
+ + + + diff --git a/assets/web/src/vapora_architecture.svg b/assets/web/src/vapora_architecture.svg new file mode 100644 index 0000000..024d66f --- /dev/null +++ b/assets/web/src/vapora_architecture.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/web/src/vapora_architecture_white.svg b/assets/web/src/vapora_architecture_white.svg new file mode 100644 index 0000000..2dd47d8 --- /dev/null +++ b/assets/web/src/vapora_architecture_white.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/web/src/vapora_white.svg b/assets/web/src/vapora_white.svg new file mode 100644 index 0000000..78bcfee --- /dev/null +++ b/assets/web/src/vapora_white.svg @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA + + + + VAPORA + + + + + + VAPORA + + + + + Evaporate complexity + + + + + + + + + + diff --git a/assets/web/vapora_architecture.svg b/assets/web/vapora_architecture.svg new file mode 100644 index 0000000..024d66f --- /dev/null +++ b/assets/web/vapora_architecture.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/web/vapora_architecture_white.svg b/assets/web/vapora_architecture_white.svg new file mode 100644 index 0000000..2dd47d8 --- /dev/null +++ b/assets/web/vapora_architecture_white.svg @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA ARCHITECTURE + 18 CRATES · 354 TESTS · 100% RUST + + + PRESENTATION + SERVICES + INTELLIGENCE + DATA + PROVIDERS + + + + + + + + + + + Leptos WASM Frontend + Kanban Board · Glassmorphism UI · UnoCSS · Reactive Components + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Axum Backend API + 40+ REST Endpoints · 161 Tests + + + Projects + Tasks + Agents + Workflows + Proposals + Swarm + Metrics + RLM API + WebSocket + + + + + + + + Agent Runtime + Orchestration · Learning Profiles · 71 Tests + + Registry + Coordinator + Scoring + Profiles + 12 Roles + Health Checks + Recency Bias + + + + + + + + + + + + + MCP Gateway + Model Context Protocol · Plugin System + + Stdio Transport + SSE Transport + JSON-RPC + 6 Tools + Tool Registry + Schema Validation + + + + + + + A2A Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RLM Engine + Recursive Language Models · 38 Tests · 17k+ LOC + + + + + + Chunking + + + Hybrid Search + + + Dispatcher + + + + + BM25 + + + Semantic + + + RRF + + + Sandbox + + + + + + + + + + + + + + + + + + + Multi-IA LLM Router + Budget Enforcement · Cost Tracking · 53 Tests + + + + + Rule Router + + Budget Manager + + Cost Tracker + + + + Fallback Chain + + Cost Ranker + + Providers + + + + + + + + + + + + + + + + + + Swarm Coordinator + Load Balancing · Prometheus · 6 Tests + + + + Assignment + + Filtering + + Metrics + + + + success_rate / (1+load) + + Capabilities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Knowledge Graph + Temporal History · Learning Curves · 20 Tests + + + Executions + + Similarity + + Causal + + + + + + + + SurrealDB + Multi-Model Database · Multi-Tenant Scopes + + + + Projects + + Tasks + + Agents + + Chunks + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + NATS JetStream + Message Queue · Async Coordination · Pub/Sub + + + + Agent Jobs + + Workflows + + Proposals + + A2A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anthropic Claude + Opus · Sonnet · Haiku + + + + + + OpenAI + GPT-4 · GPT-4o · GPT-3.5 + + + + + + Google Gemini + 2.0 Pro · Flash · 1.5 Pro + + + + + + Ollama (Local) + Llama · Mistral · CodeLlama + + + + + + + + + SUPPORTING CRATES + + vapora-shared + vapora-tracking + vapora-telemetry + vapora-analytics + vapora-worktree + vapora-doc-lifecycle + vapora-workflow-engine + vapora-cli + vapora-leptos-ui + + + + + + + + + + + + PROMETHEUS · GRAFANA · OPENTELEMETRY + + + + + + + VAPORA v1.2.0 + Evaporate complexity · Full-Stack Rust · Production Ready + + + + + diff --git a/assets/web/vapora_white.svg b/assets/web/vapora_white.svg new file mode 100644 index 0000000..78bcfee --- /dev/null +++ b/assets/web/vapora_white.svg @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VAPORA + + + + VAPORA + + + + + + VAPORA + + + + + Evaporate complexity + + + + + + + + + + diff --git a/docs/architecture/decisions/008-recursive-language-models-integration.md b/docs/architecture/decisions/008-recursive-language-models-integration.md new file mode 100644 index 0000000..75e5aad --- /dev/null +++ b/docs/architecture/decisions/008-recursive-language-models-integration.md @@ -0,0 +1,402 @@ +# ADR-008: Recursive Language Models (RLM) Integration + +**Date**: 2026-02-16 +**Status**: Accepted +**Deciders**: VAPORA Team +**Technical Story**: Phase 9 - RLM as Core Foundation + +## Context and Problem Statement + +VAPORA's agent system relied on **direct LLM calls** for all reasoning tasks, which created fundamental limitations: + +1. **Context window limitations**: Single LLM calls fail beyond 50-100k tokens (context rot) +2. **No knowledge reuse**: Historical executions were not semantically searchable +3. **Single-shot reasoning**: No distributed analysis across document chunks +4. **Cost inefficiency**: Processing entire documents repeatedly instead of relevant chunks +5. **No incremental learning**: Agents couldn't learn from past successful solutions + +**Question**: How do we enable long-context reasoning, knowledge reuse, and distributed LLM processing in VAPORA? + +## Decision Drivers + +**Must Have:** +- Handle documents >100k tokens without context rot +- Semantic search over historical executions +- Distributed reasoning across document chunks +- Integration with existing SurrealDB + NATS architecture +- Support multiple LLM providers (OpenAI, Claude, Ollama) + +**Should Have:** +- Hybrid search (keyword + semantic) +- Cost tracking per provider +- Prometheus metrics +- Sandboxed execution environment + +**Nice to Have:** +- WASM-based fast execution tier +- Docker warm pool for complex tasks + +## Considered Options + +### Option 1: RAG (Retrieval-Augmented Generation) Only + +**Approach**: Traditional RAG with vector embeddings + SurrealDB + +**Pros:** +- Simple to implement +- Well-understood pattern +- Good for basic Q&A + +**Cons:** +- ❌ No distributed reasoning (single LLM call) +- ❌ Keyword search limitations (only semantic) +- ❌ No execution sandbox +- ❌ Limited to simple retrieval tasks + +### Option 2: LangChain/LlamaIndex Integration + +**Approach**: Use existing framework (LangChain or LlamaIndex) + +**Pros:** +- Pre-built components +- Active community +- Many integrations + +**Cons:** +- ❌ Python-based (VAPORA is Rust-first) +- ❌ Heavy dependencies +- ❌ Less control over implementation +- ❌ Tight coupling to framework abstractions + +### Option 3: Recursive Language Models (RLM) - **SELECTED** + +**Approach**: Custom Rust implementation with distributed reasoning, hybrid search, and sandboxed execution + +**Pros:** +- ✅ Native Rust (zero-cost abstractions, safety) +- ✅ Hybrid search (BM25 + semantic + RRF fusion) +- ✅ Distributed LLM calls across chunks +- ✅ Sandboxed execution (WASM + Docker) +- ✅ Full control over implementation +- ✅ Reuses existing VAPORA patterns (SurrealDB, NATS, Prometheus) + +**Cons:** +- ⚠️ More initial implementation effort +- ⚠️ Maintaining custom codebase + +**Decision**: **Option 3 - RLM Custom Implementation** + +## Decision Outcome + +### Chosen Solution: Recursive Language Models (RLM) + +Implement a **native Rust RLM system** as a foundational VAPORA component, providing: + +1. **Chunking**: Fixed, Semantic, Code-aware strategies +2. **Hybrid Search**: BM25 (Tantivy) + Semantic (embeddings) + RRF fusion +3. **Distributed Reasoning**: Parallel LLM calls across relevant chunks +4. **Sandboxed Execution**: WASM tier (<10ms) + Docker tier (80-150ms) +5. **Knowledge Graph**: Store execution history with learning curves +6. **Multi-Provider**: OpenAI, Claude, Gemini, Ollama support + +### Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ RLM Engine │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Chunking │ │ Hybrid Search│ │ Dispatcher │ │ +│ │ │ │ │ │ │ │ +│ │ • Fixed │ │ • BM25 │ │ • Parallel │ │ +│ │ • Semantic │ │ • Semantic │ │ LLM calls │ │ +│ │ • Code │ │ • RRF Fusion │ │ • Aggregation│ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Storage │ │ Sandbox │ │ Metrics │ │ +│ │ │ │ │ │ │ │ +│ │ • SurrealDB │ │ • WASM │ │ • Prometheus │ │ +│ │ • Chunks │ │ • Docker │ │ • Costs │ │ +│ │ • Buffers │ │ • Auto-tier │ │ • Latency │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Implementation Details + +**Crate**: `vapora-rlm` (17,000+ LOC) + +**Key Components:** + +```rust +// 1. Chunking +pub enum ChunkingStrategy { + Fixed, // Fixed-size chunks with overlap + Semantic, // Unicode-aware, sentence boundaries + Code, // AST-based (Rust, Python, JS) +} + +// 2. Hybrid Search +pub struct HybridSearch { + bm25_index: Arc, // Tantivy in-memory + storage: Arc, // SurrealDB + config: HybridSearchConfig, // RRF weights +} + +// 3. LLM Dispatch +pub struct LLMDispatcher { + client: Option>, // Multi-provider + config: DispatchConfig, // Aggregation strategy +} + +// 4. Sandbox +pub enum SandboxTier { + WASM, // <10ms, WASI-compatible commands + Docker, // <150ms, full compatibility +} +``` + +**Database Schema** (SCHEMALESS for flexibility): + +```sql +-- Chunks (from documents) +DEFINE TABLE rlm_chunks SCHEMALESS; +DEFINE INDEX idx_rlm_chunks_chunk_id ON TABLE rlm_chunks COLUMNS chunk_id UNIQUE; +DEFINE INDEX idx_rlm_chunks_doc_id ON TABLE rlm_chunks COLUMNS doc_id; + +-- Execution History (for learning) +DEFINE TABLE rlm_executions SCHEMALESS; +DEFINE INDEX idx_rlm_executions_execution_id ON TABLE rlm_executions COLUMNS execution_id UNIQUE; +DEFINE INDEX idx_rlm_executions_doc_id ON TABLE rlm_executions COLUMNS doc_id; +``` + +**Key Decision**: Use **SCHEMALESS** instead of SCHEMAFULL tables to avoid conflicts with SurrealDB's auto-generated `id` fields. + +### Production Usage + +```rust +use vapora_rlm::{RLMEngine, ChunkingConfig, EmbeddingConfig}; +use vapora_llm_router::providers::OpenAIClient; + +// Setup LLM client +let llm_client = Arc::new(OpenAIClient::new( + api_key, "gpt-4".to_string(), + 4096, 0.7, 5.0, 15.0 +)?); + +// Configure RLM +let config = RLMEngineConfig { + chunking: ChunkingConfig { + strategy: ChunkingStrategy::Semantic, + chunk_size: 1000, + overlap: 200, + }, + embedding: Some(EmbeddingConfig::openai_small()), + auto_rebuild_bm25: true, + max_chunks_per_doc: 10_000, +}; + +// Create engine +let engine = RLMEngine::with_llm_client( + storage, bm25_index, llm_client, Some(config) +)?; + +// Usage +let chunks = engine.load_document(doc_id, content, None).await?; +let results = engine.query(doc_id, "error handling", None, 5).await?; +let response = engine.dispatch_subtask(doc_id, "Analyze code", None, 5).await?; +``` + +## Consequences + +### Positive + +**Performance:** +- ✅ Handles 100k+ line documents without context rot +- ✅ Query latency: ~90ms average (100 queries benchmark) +- ✅ WASM tier: <10ms for simple commands +- ✅ Docker tier: <150ms from warm pool +- ✅ Full workflow: <30s for 10k lines (2728 chunks) + +**Functionality:** +- ✅ Hybrid search outperforms pure semantic or BM25 alone +- ✅ Distributed reasoning reduces hallucinations +- ✅ Knowledge Graph enables learning from past executions +- ✅ Multi-provider support (OpenAI, Claude, Ollama) + +**Quality:** +- ✅ 38/38 tests passing (100% pass rate) +- ✅ 0 clippy warnings +- ✅ Comprehensive E2E, performance, security tests +- ✅ Production-ready with real persistence (no stubs) + +**Cost Efficiency:** +- ✅ Chunk-based processing reduces token usage +- ✅ Cost tracking per provider and task +- ✅ Local Ollama option for development (free) + +### Negative + +**Complexity:** +- ⚠️ Additional component to maintain (17k+ LOC) +- ⚠️ Learning curve for distributed reasoning patterns +- ⚠️ More moving parts (chunking, BM25, embeddings, dispatch) + +**Infrastructure:** +- ⚠️ Requires SurrealDB for persistence +- ⚠️ Requires embedding provider (OpenAI/Ollama) +- ⚠️ Optional Docker for full sandbox tier + +**Performance Trade-offs:** +- ⚠️ Load time ~22s for 10k lines (chunking + embedding + indexing) +- ⚠️ BM25 rebuild time proportional to document size +- ⚠️ Memory usage: ~25MB per WASM instance, ~100-300MB per Docker container + +### Risks and Mitigations + +| Risk | Mitigation | Status | +|------|-----------|--------| +| SurrealDB schema conflicts | Use SCHEMALESS tables | ✅ Resolved | +| BM25 index performance | In-memory Tantivy, auto-rebuild | ✅ Verified | +| LLM provider costs | Cost tracking, local Ollama option | ✅ Implemented | +| Sandbox escape | WASM isolation, Docker security tests | ✅ 13/13 tests passing | +| Context window limits | Chunking + hybrid search + aggregation | ✅ Handles 100k+ tokens | + +## Validation + +### Test Coverage + +``` +Basic integration: 4/4 ✅ (100%) +E2E integration: 9/9 ✅ (100%) +Security: 13/13 ✅ (100%) +Performance: 8/8 ✅ (100%) +Debug tests: 4/4 ✅ (100%) +─────────────────────────────────── +Total: 38/38 ✅ (100%) +``` + +### Performance Benchmarks + +``` +Query Latency (100 queries): + Average: 90.6ms + P50: 87.5ms + P95: 88.3ms + P99: 91.7ms + +Large Document (10k lines): + Load: ~22s (2728 chunks) + Query: ~565ms + Full workflow: <30s + +BM25 Index: + Build time: ~100ms for 1000 docs + Search: <1ms for most queries +``` + +### Integration Points + +**Existing VAPORA Components:** +- ✅ `vapora-llm-router`: LLM client integration +- ✅ `vapora-knowledge-graph`: Execution history persistence +- ✅ `vapora-shared`: Common error types and models +- ✅ SurrealDB: Persistent storage backend +- ✅ Prometheus: Metrics export + +**New Integration Surface:** +```rust +// Backend API +POST /api/v1/rlm/analyze +{ + "content": "...", + "query": "...", + "strategy": "semantic" +} + +// Agent Coordinator +let rlm_result = rlm_engine.dispatch_subtask( + doc_id, task.description, None, 5 +).await?; +``` + +## Related Decisions + +- **ADR-003**: Multi-provider LLM routing (Phase 6 dependency) +- **ADR-005**: Knowledge Graph temporal modeling (RLM execution history) +- **ADR-006**: Prometheus metrics standardization (RLM metrics) + +## References + +**Implementation:** +- `crates/vapora-rlm/` - Full RLM implementation +- `crates/vapora-rlm/PRODUCTION.md` - Production setup guide +- `crates/vapora-rlm/examples/` - Working examples +- `migrations/008_rlm_schema.surql` - Database schema + +**External:** +- [Tantivy](https://github.com/quickwit-oss/tantivy) - BM25 full-text search +- [RRF Paper](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) - Reciprocal Rank Fusion +- [WASM Security Model](https://webassembly.org/docs/security/) + +**Tests:** +- `tests/e2e_integration.rs` - End-to-end workflow tests +- `tests/performance_test.rs` - Performance benchmarks +- `tests/security_test.rs` - Sandbox security validation + +## Notes + +**Why SCHEMALESS vs SCHEMAFULL?** + +Initial implementation used SCHEMAFULL with explicit `id` field definitions: +```sql +DEFINE TABLE rlm_chunks SCHEMAFULL; +DEFINE FIELD id ON TABLE rlm_chunks TYPE record; -- ❌ Conflict +``` + +This caused data persistence failures because SurrealDB auto-generates `id` fields. Changed to SCHEMALESS: +```sql +DEFINE TABLE rlm_chunks SCHEMALESS; -- ✅ Works +DEFINE INDEX idx_rlm_chunks_chunk_id ON TABLE rlm_chunks COLUMNS chunk_id UNIQUE; +``` + +Indexes still work with SCHEMALESS, providing necessary performance without schema conflicts. + +**Why Hybrid Search?** + +Pure BM25 (keyword): +- ✅ Fast, exact matches +- ❌ Misses semantic similarity + +Pure Semantic (embeddings): +- ✅ Understands meaning +- ❌ Expensive, misses exact keywords + +Hybrid (BM25 + Semantic + RRF): +- ✅ Best of both worlds +- ✅ Reciprocal Rank Fusion combines rankings optimally +- ✅ Empirically outperforms either alone + +**Why Custom Implementation vs Framework?** + +Frameworks (LangChain, LlamaIndex): +- Python-based (VAPORA is Rust) +- Heavy abstractions +- Less control +- Dependency lock-in + +Custom Rust RLM: +- Native performance +- Full control +- Zero-cost abstractions +- Direct integration with VAPORA patterns + +**Trade-off accepted**: More initial effort for long-term maintainability and performance. + +--- + +**Supersedes**: None (new decision) +**Amended by**: None +**Last Updated**: 2026-02-16 diff --git a/docs/guides/rlm-usage-guide.md b/docs/guides/rlm-usage-guide.md new file mode 100644 index 0000000..e48efd8 --- /dev/null +++ b/docs/guides/rlm-usage-guide.md @@ -0,0 +1,540 @@ +# RLM Usage Guide - Recursive Language Models + +Guide completa para usar y adaptar el sistema RLM en VAPORA. + +## Tabla de Contenidos + +- [Introducción](#introducción) +- [Conceptos Básicos](#conceptos-básicos) +- [Configuración](#configuración) +- [Casos de Uso](#casos-de-uso) +- [Adaptación](#adaptación) +- [Troubleshooting](#troubleshooting) + +## Introducción + +### ¿Qué es RLM? + +RLM (Recursive Language Models) es un sistema de razonamiento distribuido que permite: + +- **Procesar documentos grandes** (>100k tokens) sin limitaciones de contexto +- **Búsqueda híbrida** (keywords + semántica) con RRF fusion +- **Razonamiento distribuido** mediante llamadas LLM paralelas +- **Aprendizaje incremental** desde historial de ejecuciones + +### ¿Cuándo usar RLM? + +**Usa RLM cuando:** +- ✅ Documentos >50k tokens +- ✅ Necesitas búsqueda semántica + keywords +- ✅ Razonamiento sobre múltiples partes del documento +- ✅ Reutilizar soluciones de ejecuciones pasadas + +**No uses RLM cuando:** +- ❌ Documentos pequeños (<10k tokens) - usa LLM directo +- ❌ Solo necesitas chat simple +- ❌ Latencia crítica (<100ms) - usa cache/embeddings directos + +## Conceptos Básicos + +### 1. Chunking (Fragmentación) + +Divide documentos en chunks (fragmentos) manejables: + +```rust +use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy}; + +// Fixed: Tamaño fijo con overlap +let config = ChunkingConfig { + strategy: ChunkingStrategy::Fixed, + chunk_size: 1000, // 1000 caracteres por chunk + overlap: 200, // 200 caracteres de overlap +}; + +// Semantic: Respeta límites de oraciones +let config = ChunkingConfig { + strategy: ChunkingStrategy::Semantic, + chunk_size: 1000, + overlap: 200, +}; + +// Code: Usa AST para código fuente +let config = ChunkingConfig { + strategy: ChunkingStrategy::Code, + chunk_size: 1500, + overlap: 300, +}; +``` + +**Guía de selección:** + +| Tipo de contenido | Estrategia recomendada | Chunk size | +|-------------------|------------------------|------------| +| Prosa, docs | Semantic | 1000-2000 | +| Código fuente | Code | 1500-3000 | +| Logs, datos | Fixed | 500-1000 | +| Mixto | Semantic | 1000-1500 | + +### 2. Hybrid Search (Búsqueda Híbrida) + +Combina BM25 (keywords) + embeddings (semántica) + RRF: + +```rust +// Automático - RLM maneja la búsqueda híbrida +let results = engine.query( + doc_id, // ID del documento + "error handling", // Query (keywords) + None, // Embedding opcional + 5, // Top 5 resultados +).await?; + +// Resultados incluyen scores híbridos +for result in results { + println!("Score: {}", result.score); // Score RRF combinado + println!("BM25: {:?}", result.bm25_score); + println!("Semantic: {:?}", result.semantic_score); + println!("Content: {}", result.chunk.content); +} +``` + +**Scores:** +- `score`: Score RRF final (mayor = mejor) +- `bm25_score`: Score keyword (TF-IDF based) +- `semantic_score`: Score semántico (cosine similarity) + +### 3. LLM Dispatch (Razonamiento Distribuido) + +Envía chunks relevantes al LLM para análisis: + +```rust +let response = engine.dispatch_subtask( + doc_id, // Documento + "Explain error handling", // Tarea para el LLM + None, // Embedding query opcional + 5, // Top 5 chunks relevantes +).await?; + +println!("LLM Response: {}", response.text); +println!("Tokens: {} in, {} out", + response.total_input_tokens, + response.total_output_tokens +); +println!("Cost: ${:.4}", + (response.total_input_tokens as f64 * 5.0 / 1_000_000.0) + + (response.total_output_tokens as f64 * 15.0 / 1_000_000.0) +); +``` + +## Configuración + +### Setup Básico (Development) + +```rust +use std::sync::Arc; +use surrealdb::engine::remote::ws::Ws; +use surrealdb::opt::auth::Root; +use surrealdb::Surreal; +use vapora_llm_router::providers::OllamaClient; +use vapora_rlm::search::bm25::BM25Index; +use vapora_rlm::storage::SurrealDBStorage; +use vapora_rlm::RLMEngine; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // 1. SurrealDB + let db = Surreal::new::("127.0.0.1:8000").await?; + db.signin(Root { + username: "root", + password: "root", + }).await?; + db.use_ns("dev").use_db("rlm").await?; + + // 2. Ollama (local, gratis) + let llm_client = Arc::new(OllamaClient::new( + "http://localhost:11434".to_string(), + "llama3.2".to_string(), + 4096, 0.7, + )?); + + // 3. Storage y BM25 + let storage = Arc::new(SurrealDBStorage::new(db)); + let bm25_index = Arc::new(BM25Index::new()?); + + // 4. RLM Engine + let engine = RLMEngine::with_llm_client( + storage, + bm25_index, + llm_client, + None, // Config por defecto + )?; + + Ok(()) +} +``` + +### Setup Producción (OpenAI) + +```rust +use vapora_llm_router::providers::OpenAIClient; +use vapora_rlm::engine::RLMEngineConfig; +use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy}; +use vapora_rlm::embeddings::EmbeddingConfig; + +// LLM client +let llm_client = Arc::new(OpenAIClient::new( + std::env::var("OPENAI_API_KEY")?, + "gpt-4".to_string(), + 4096, 0.7, + 5.0, // $5 per 1M input tokens + 15.0, // $15 per 1M output tokens +)?); + +// Config optimizada +let config = RLMEngineConfig { + chunking: ChunkingConfig { + strategy: ChunkingStrategy::Semantic, + chunk_size: 1500, + overlap: 300, + }, + embedding: Some(EmbeddingConfig::openai_small()), + auto_rebuild_bm25: true, + max_chunks_per_doc: 10_000, +}; + +// Engine con config +let engine = RLMEngine::with_llm_client( + storage, + bm25_index, + llm_client, + Some(config), +)?; +``` + +## Casos de Uso + +### 1. Análisis de Código (Code Review) + +```rust +// Cargar repositorio +let code = std::fs::read_to_string("src/main.rs")?; +let chunks = engine.load_document("repo/main.rs", &code, None).await?; + +// Buscar errores potenciales +let results = engine.query( + "repo/main.rs", + "unsafe unwrap panic error", // Keywords + None, + 10 +).await?; + +// Analizar con LLM +let review = engine.dispatch_subtask( + "repo/main.rs", + "Review this Rust code for potential bugs, unsafe patterns, \ + and suggest improvements following best practices", + None, + 10 // Top 10 chunks relevantes +).await?; + +println!("Code Review:\n{}", review.text); +``` + +### 2. Documentación Q&A + +```rust +// Cargar documentación +let docs = std::fs::read_to_string("docs/README.md")?; +engine.load_document("docs", &docs, None).await?; + +// Query usuario +let question = "How do I configure authentication?"; +let relevant_chunks = engine.query("docs", question, None, 5).await?; + +// Respuesta del LLM basada en docs +let answer = engine.dispatch_subtask( + "docs", + &format!("Answer this question based on the documentation: {}", question), + None, + 5 +).await?; + +println!("Answer: {}", answer.text); +``` + +### 3. Log Analysis + +```rust +// Cargar logs (grandes volúmenes) +let logs = std::fs::read_to_string("/var/log/app.log")?; +let chunks = engine.load_document( + "logs/app", + &logs, + Some(ChunkingConfig { + strategy: ChunkingStrategy::Fixed, + chunk_size: 500, // Logs más pequeños + overlap: 50, + }) +).await?; + +// Buscar errores +let errors = engine.query( + "logs/app", + "ERROR FATAL exception crash", + None, + 20 +).await?; + +// Análisis de root cause +let analysis = engine.dispatch_subtask( + "logs/app", + "Analyze these error logs and identify the root cause. \ + Suggest fixes and preventive measures.", + None, + 20 +).await?; + +println!("Root Cause Analysis:\n{}", analysis.text); +``` + +### 4. Knowledge Base Building + +```rust +// Cargar múltiples documentos +let docs = vec![ + ("guide1.md", std::fs::read_to_string("docs/guide1.md")?), + ("guide2.md", std::fs::read_to_string("docs/guide2.md")?), + ("api.md", std::fs::read_to_string("docs/api.md")?), +]; + +for (id, content) in docs { + engine.load_document(id, &content, None).await?; +} + +// Query cross-document +let results = engine.query( + "guide1.md", // Busca en este doc primero + "authentication setup", + None, + 5 +).await?; + +// También buscar en otros docs +let all_results = futures::future::join_all( + ["guide1.md", "guide2.md", "api.md"] + .iter() + .map(|doc_id| engine.query(doc_id, "authentication", None, 3)) +).await; +``` + +## Adaptación + +### Tuning de Performance + +#### 1. Chunk Size Optimization + +```rust +// Para documentos técnicos densos +ChunkingConfig { + strategy: ChunkingStrategy::Semantic, + chunk_size: 2000, // Chunks grandes = más contexto + overlap: 400, // Overlap mayor para continuidad +} + +// Para búsqueda precisa +ChunkingConfig { + strategy: ChunkingStrategy::Fixed, + chunk_size: 500, // Chunks pequeños = precisión + overlap: 50, // Overlap menor = más chunks únicos +} +``` + +**Regla general:** +- Chunks grandes (1500-2000): Más contexto, menos chunks, búsqueda más lenta +- Chunks medianos (1000-1500): Balance óptimo +- Chunks pequeños (500-1000): Precisión alta, más chunks, más tokens + +#### 2. Embedding Provider Selection + +```rust +use vapora_rlm::embeddings::EmbeddingConfig; + +// Desarrollo local (gratis) +let config = RLMEngineConfig { + embedding: Some(EmbeddingConfig::ollama("llama3.2")), + ..Default::default() +}; + +// Producción (mejor calidad) +let config = RLMEngineConfig { + embedding: Some(EmbeddingConfig::openai_large()), // 3072 dims + ..Default::default() +}; + +// Balance costo/calidad +let config = RLMEngineConfig { + embedding: Some(EmbeddingConfig::openai_small()), // 1536 dims + ..Default::default() +}; +``` + +#### 3. Query Optimization + +```rust +// Búsqueda amplia (exploratoria) +let results = engine.query(doc_id, query, None, 20).await?; + +// Búsqueda precisa (top results) +let results = engine.query(doc_id, query, None, 3).await?; + +// Con embedding personalizado +let embedding = embedding_generator.embed(query).await?; +let results = engine.query(doc_id, query, Some(&embedding), 5).await?; +``` + +### Custom Chunking Strategy + +```rust +use vapora_rlm::chunking::{Chunker, ChunkResult}; + +// Implementa tu propia estrategia +pub struct CustomChunker { + chunk_size: usize, + // ... custom logic +} + +impl Chunker for CustomChunker { + fn chunk(&self, text: &str) -> Result, ChunkingError> { + // Tu lógica de chunking + todo!() + } +} +``` + +### Custom Aggregation Strategy + +```rust +use vapora_rlm::dispatch::{DispatchConfig, AggregationStrategy}; + +let config = DispatchConfig { + include_content: true, + include_metadata: false, + max_chunks_per_dispatch: 10, + aggregation: AggregationStrategy::MajorityVote, // Para clasificación +}; +``` + +## Troubleshooting + +### Problem: Query returns 0 results + +**Diagnóstico:** +```rust +// 1. Verificar chunks cargados +let stats = bm25_index.stats(); +println!("BM25 docs: {}", stats.num_docs); + +// 2. Verificar storage +let chunks = storage.get_chunks(doc_id).await?; +println!("Storage chunks: {}", chunks.len()); + +// 3. Test BM25 directo +let bm25_results = bm25_index.search(query, 10)?; +println!("BM25 results: {}", bm25_results.len()); +``` + +**Soluciones:** +- Asegurar que `load_document()` completó exitosamente +- Verificar que el query matchea contenido del documento +- Aumentar límite de resultados (`limit`) +- Usar keywords más generales + +### Problem: Slow load performance + +**Diagnóstico:** +```rust +let start = Instant::now(); +let chunks = engine.load_document(doc_id, content, None).await?; +println!("Load time: {:?}", start.elapsed()); +println!("Chunks created: {}", chunks); +println!("Time per chunk: {:?}", start.elapsed() / chunks as u32); +``` + +**Optimizaciones:** +- Deshabilitar embeddings temporalmente: `embedding: None` +- Aumentar chunk size (menos chunks) +- Usar `auto_rebuild_bm25: false` y rebuild manual +- Batch loading para múltiples documentos + +### Problem: High LLM costs + +**Monitoreo:** +```rust +let response = engine.dispatch_subtask(doc_id, task, None, 5).await?; + +let cost = (response.total_input_tokens as f64 * 5.0 / 1_000_000.0) + + (response.total_output_tokens as f64 * 15.0 / 1_000_000.0); + +println!("Cost this call: ${:.4}", cost); +println!("Input tokens: {}", response.total_input_tokens); +println!("Output tokens: {}", response.total_output_tokens); +``` + +**Reducciones:** +- Reducir `limit` en queries (menos chunks al LLM) +- Usar modelos más baratos (gpt-3.5-turbo vs gpt-4) +- Usar Ollama local para desarrollo +- Cache de resultados frecuentes +- Chunk size más grande (menos llamadas) + +### Problem: SurrealDB schema errors + +**Verificación:** +```bash +# Aplicar schema correcto +cd crates/vapora-rlm/tests +bash test_setup.sh +``` + +**Alternativa manual:** +```sql +-- Conectar a SurrealDB +USE NS production DB rlm; + +-- Verificar tablas +INFO FOR DB; + +-- Limpiar si necesario +REMOVE TABLE rlm_chunks; +REMOVE TABLE rlm_buffers; +REMOVE TABLE rlm_executions; + +-- Reaplicar schema +-- (copiar contenido de migrations/008_rlm_schema.surql) +``` + +## Ejemplos Completos + +Ver directorio `examples/`: + +```bash +# Local development con Ollama (gratis) +cargo run --example local_ollama + +# Production con OpenAI +export OPENAI_API_KEY="sk-..." +cargo run --example production_setup +``` + +## Referencias + +- **ADR**: `docs/architecture/decisions/008-recursive-language-models-integration.md` +- **Production Guide**: `crates/vapora-rlm/PRODUCTION.md` +- **API Docs**: `cargo doc --open -p vapora-rlm` +- **Tests**: `crates/vapora-rlm/tests/` + +## Siguiente Paso + +1. ✅ Ejecuta ejemplo local: `cargo run --example local_ollama` +2. ✅ Lee el ADR para decisiones arquitecturales +3. ✅ Revisa tests para ejemplos de uso: `tests/e2e_integration.rs` +4. ✅ Consulta PRODUCTION.md para deployment diff --git a/migrations/008_rlm_schema.surql b/migrations/008_rlm_schema.surql new file mode 100644 index 0000000..6c9c1c8 --- /dev/null +++ b/migrations/008_rlm_schema.surql @@ -0,0 +1,30 @@ +-- Migration 008: RLM Schema +-- Creates tables for Recursive Language Models (RLM) integration +-- Provides chunking, buffering, and execution history storage + +-- Use test namespace and database +USE NS test_rlm_e2e DB test_rlm_e2e; + +-- RLM Chunks table (from documents) +-- Note: Using SCHEMALESS instead of SCHEMAFULL because: +-- 1. SurrealDB auto-generates `id` field for all records +-- 2. Explicit `id` field definition in SCHEMAFULL causes conflicts with CREATE queries +-- 3. SCHEMALESS provides flexibility while still allowing indexes +DEFINE TABLE rlm_chunks SCHEMALESS; + +-- Indexes for efficient queries (work with SCHEMALESS tables) +DEFINE INDEX idx_rlm_chunks_chunk_id ON TABLE rlm_chunks COLUMNS chunk_id UNIQUE; +DEFINE INDEX idx_rlm_chunks_doc_id ON TABLE rlm_chunks COLUMNS doc_id; + +-- RLM Buffers table (pass-by-reference for large contexts) +DEFINE TABLE rlm_buffers SCHEMALESS; + +-- Indexes for buffers +DEFINE INDEX idx_rlm_buffers_buffer_id ON TABLE rlm_buffers COLUMNS buffer_id UNIQUE; + +-- RLM Execution History table (for learning) +DEFINE TABLE rlm_executions SCHEMALESS; + +-- Indexes for execution history +DEFINE INDEX idx_rlm_executions_execution_id ON TABLE rlm_executions COLUMNS execution_id UNIQUE; +DEFINE INDEX idx_rlm_executions_doc_id ON TABLE rlm_executions COLUMNS doc_id;