2026-02-16 05:12:22 +00:00
|
|
|
# RLM Usage Guide - Recursive Language Models
|
|
|
|
|
|
|
|
|
|
Guide completa para usar y adaptar el sistema RLM en VAPORA.
|
|
|
|
|
|
|
|
|
|
## Tabla de Contenidos
|
|
|
|
|
|
|
|
|
|
- [Introducción](#introducción)
|
|
|
|
|
- [Conceptos Básicos](#conceptos-básicos)
|
|
|
|
|
- [Configuración](#configuración)
|
|
|
|
|
- [Casos de Uso](#casos-de-uso)
|
|
|
|
|
- [Adaptación](#adaptación)
|
|
|
|
|
- [Troubleshooting](#troubleshooting)
|
|
|
|
|
|
|
|
|
|
## Introducción
|
|
|
|
|
|
|
|
|
|
### ¿Qué es RLM?
|
|
|
|
|
|
|
|
|
|
RLM (Recursive Language Models) es un sistema de razonamiento distribuido que permite:
|
|
|
|
|
|
|
|
|
|
- **Procesar documentos grandes** (>100k tokens) sin limitaciones de contexto
|
|
|
|
|
- **Búsqueda híbrida** (keywords + semántica) con RRF fusion
|
|
|
|
|
- **Razonamiento distribuido** mediante llamadas LLM paralelas
|
|
|
|
|
- **Aprendizaje incremental** desde historial de ejecuciones
|
|
|
|
|
|
|
|
|
|
### ¿Cuándo usar RLM?
|
|
|
|
|
|
|
|
|
|
**Usa RLM cuando:**
|
|
|
|
|
- ✅ Documentos >50k tokens
|
|
|
|
|
- ✅ Necesitas búsqueda semántica + keywords
|
|
|
|
|
- ✅ Razonamiento sobre múltiples partes del documento
|
|
|
|
|
- ✅ Reutilizar soluciones de ejecuciones pasadas
|
|
|
|
|
|
|
|
|
|
**No uses RLM cuando:**
|
|
|
|
|
- ❌ Documentos pequeños (<10k tokens) - usa LLM directo
|
|
|
|
|
- ❌ Solo necesitas chat simple
|
|
|
|
|
- ❌ Latencia crítica (<100ms) - usa cache/embeddings directos
|
|
|
|
|
|
|
|
|
|
## Conceptos Básicos
|
|
|
|
|
|
|
|
|
|
### 1. Chunking (Fragmentación)
|
|
|
|
|
|
|
|
|
|
Divide documentos en chunks (fragmentos) manejables:
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
|
|
|
|
|
|
|
|
|
|
// Fixed: Tamaño fijo con overlap
|
|
|
|
|
let config = ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Fixed,
|
|
|
|
|
chunk_size: 1000, // 1000 caracteres por chunk
|
|
|
|
|
overlap: 200, // 200 caracteres de overlap
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Semantic: Respeta límites de oraciones
|
|
|
|
|
let config = ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Semantic,
|
|
|
|
|
chunk_size: 1000,
|
|
|
|
|
overlap: 200,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Code: Usa AST para código fuente
|
|
|
|
|
let config = ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Code,
|
|
|
|
|
chunk_size: 1500,
|
|
|
|
|
overlap: 300,
|
|
|
|
|
};
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Guía de selección:**
|
|
|
|
|
|
|
|
|
|
| Tipo de contenido | Estrategia recomendada | Chunk size |
|
|
|
|
|
|-------------------|------------------------|------------|
|
|
|
|
|
| Prosa, docs | Semantic | 1000-2000 |
|
|
|
|
|
| Código fuente | Code | 1500-3000 |
|
|
|
|
|
| Logs, datos | Fixed | 500-1000 |
|
|
|
|
|
| Mixto | Semantic | 1000-1500 |
|
|
|
|
|
|
|
|
|
|
### 2. Hybrid Search (Búsqueda Híbrida)
|
|
|
|
|
|
|
|
|
|
Combina BM25 (keywords) + embeddings (semántica) + RRF:
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Automático - RLM maneja la búsqueda híbrida
|
|
|
|
|
let results = engine.query(
|
|
|
|
|
doc_id, // ID del documento
|
|
|
|
|
"error handling", // Query (keywords)
|
|
|
|
|
None, // Embedding opcional
|
|
|
|
|
5, // Top 5 resultados
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
// Resultados incluyen scores híbridos
|
|
|
|
|
for result in results {
|
|
|
|
|
println!("Score: {}", result.score); // Score RRF combinado
|
|
|
|
|
println!("BM25: {:?}", result.bm25_score);
|
|
|
|
|
println!("Semantic: {:?}", result.semantic_score);
|
|
|
|
|
println!("Content: {}", result.chunk.content);
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Scores:**
|
|
|
|
|
- `score`: Score RRF final (mayor = mejor)
|
|
|
|
|
- `bm25_score`: Score keyword (TF-IDF based)
|
|
|
|
|
- `semantic_score`: Score semántico (cosine similarity)
|
|
|
|
|
|
|
|
|
|
### 3. LLM Dispatch (Razonamiento Distribuido)
|
|
|
|
|
|
|
|
|
|
Envía chunks relevantes al LLM para análisis:
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
let response = engine.dispatch_subtask(
|
|
|
|
|
doc_id, // Documento
|
|
|
|
|
"Explain error handling", // Tarea para el LLM
|
|
|
|
|
None, // Embedding query opcional
|
|
|
|
|
5, // Top 5 chunks relevantes
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
println!("LLM Response: {}", response.text);
|
|
|
|
|
println!("Tokens: {} in, {} out",
|
|
|
|
|
response.total_input_tokens,
|
|
|
|
|
response.total_output_tokens
|
|
|
|
|
);
|
|
|
|
|
println!("Cost: ${:.4}",
|
|
|
|
|
(response.total_input_tokens as f64 * 5.0 / 1_000_000.0) +
|
|
|
|
|
(response.total_output_tokens as f64 * 15.0 / 1_000_000.0)
|
|
|
|
|
);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Configuración
|
|
|
|
|
|
|
|
|
|
### Setup Básico (Development)
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
use surrealdb::engine::remote::ws::Ws;
|
|
|
|
|
use surrealdb::opt::auth::Root;
|
|
|
|
|
use surrealdb::Surreal;
|
|
|
|
|
use vapora_llm_router::providers::OllamaClient;
|
|
|
|
|
use vapora_rlm::search::bm25::BM25Index;
|
|
|
|
|
use vapora_rlm::storage::SurrealDBStorage;
|
|
|
|
|
use vapora_rlm::RLMEngine;
|
|
|
|
|
|
|
|
|
|
#[tokio::main]
|
|
|
|
|
async fn main() -> anyhow::Result<()> {
|
|
|
|
|
// 1. SurrealDB
|
|
|
|
|
let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
|
|
|
|
|
db.signin(Root {
|
|
|
|
|
username: "root",
|
|
|
|
|
password: "root",
|
|
|
|
|
}).await?;
|
|
|
|
|
db.use_ns("dev").use_db("rlm").await?;
|
|
|
|
|
|
|
|
|
|
// 2. Ollama (local, gratis)
|
|
|
|
|
let llm_client = Arc::new(OllamaClient::new(
|
|
|
|
|
"http://localhost:11434".to_string(),
|
|
|
|
|
"llama3.2".to_string(),
|
|
|
|
|
4096, 0.7,
|
|
|
|
|
)?);
|
|
|
|
|
|
|
|
|
|
// 3. Storage y BM25
|
|
|
|
|
let storage = Arc::new(SurrealDBStorage::new(db));
|
|
|
|
|
let bm25_index = Arc::new(BM25Index::new()?);
|
|
|
|
|
|
|
|
|
|
// 4. RLM Engine
|
|
|
|
|
let engine = RLMEngine::with_llm_client(
|
|
|
|
|
storage,
|
|
|
|
|
bm25_index,
|
|
|
|
|
llm_client,
|
|
|
|
|
None, // Config por defecto
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Setup Producción (OpenAI)
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use vapora_llm_router::providers::OpenAIClient;
|
|
|
|
|
use vapora_rlm::engine::RLMEngineConfig;
|
|
|
|
|
use vapora_rlm::chunking::{ChunkingConfig, ChunkingStrategy};
|
|
|
|
|
use vapora_rlm::embeddings::EmbeddingConfig;
|
|
|
|
|
|
|
|
|
|
// LLM client
|
|
|
|
|
let llm_client = Arc::new(OpenAIClient::new(
|
|
|
|
|
std::env::var("OPENAI_API_KEY")?,
|
|
|
|
|
"gpt-4".to_string(),
|
|
|
|
|
4096, 0.7,
|
|
|
|
|
5.0, // $5 per 1M input tokens
|
|
|
|
|
15.0, // $15 per 1M output tokens
|
|
|
|
|
)?);
|
|
|
|
|
|
|
|
|
|
// Config optimizada
|
|
|
|
|
let config = RLMEngineConfig {
|
|
|
|
|
chunking: ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Semantic,
|
|
|
|
|
chunk_size: 1500,
|
|
|
|
|
overlap: 300,
|
|
|
|
|
},
|
|
|
|
|
embedding: Some(EmbeddingConfig::openai_small()),
|
|
|
|
|
auto_rebuild_bm25: true,
|
|
|
|
|
max_chunks_per_doc: 10_000,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Engine con config
|
|
|
|
|
let engine = RLMEngine::with_llm_client(
|
|
|
|
|
storage,
|
|
|
|
|
bm25_index,
|
|
|
|
|
llm_client,
|
|
|
|
|
Some(config),
|
|
|
|
|
)?;
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Casos de Uso
|
|
|
|
|
|
|
|
|
|
### 1. Análisis de Código (Code Review)
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Cargar repositorio
|
|
|
|
|
let code = std::fs::read_to_string("src/main.rs")?;
|
|
|
|
|
let chunks = engine.load_document("repo/main.rs", &code, None).await?;
|
|
|
|
|
|
|
|
|
|
// Buscar errores potenciales
|
|
|
|
|
let results = engine.query(
|
|
|
|
|
"repo/main.rs",
|
|
|
|
|
"unsafe unwrap panic error", // Keywords
|
|
|
|
|
None,
|
|
|
|
|
10
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
// Analizar con LLM
|
|
|
|
|
let review = engine.dispatch_subtask(
|
|
|
|
|
"repo/main.rs",
|
|
|
|
|
"Review this Rust code for potential bugs, unsafe patterns, \
|
|
|
|
|
and suggest improvements following best practices",
|
|
|
|
|
None,
|
|
|
|
|
10 // Top 10 chunks relevantes
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
println!("Code Review:\n{}", review.text);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 2. Documentación Q&A
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Cargar documentación
|
|
|
|
|
let docs = std::fs::read_to_string("docs/README.md")?;
|
|
|
|
|
engine.load_document("docs", &docs, None).await?;
|
|
|
|
|
|
|
|
|
|
// Query usuario
|
|
|
|
|
let question = "How do I configure authentication?";
|
|
|
|
|
let relevant_chunks = engine.query("docs", question, None, 5).await?;
|
|
|
|
|
|
|
|
|
|
// Respuesta del LLM basada en docs
|
|
|
|
|
let answer = engine.dispatch_subtask(
|
|
|
|
|
"docs",
|
|
|
|
|
&format!("Answer this question based on the documentation: {}", question),
|
|
|
|
|
None,
|
|
|
|
|
5
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
println!("Answer: {}", answer.text);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 3. Log Analysis
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Cargar logs (grandes volúmenes)
|
|
|
|
|
let logs = std::fs::read_to_string("/var/log/app.log")?;
|
|
|
|
|
let chunks = engine.load_document(
|
|
|
|
|
"logs/app",
|
|
|
|
|
&logs,
|
|
|
|
|
Some(ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Fixed,
|
|
|
|
|
chunk_size: 500, // Logs más pequeños
|
|
|
|
|
overlap: 50,
|
|
|
|
|
})
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
// Buscar errores
|
|
|
|
|
let errors = engine.query(
|
|
|
|
|
"logs/app",
|
|
|
|
|
"ERROR FATAL exception crash",
|
|
|
|
|
None,
|
|
|
|
|
20
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
// Análisis de root cause
|
|
|
|
|
let analysis = engine.dispatch_subtask(
|
|
|
|
|
"logs/app",
|
|
|
|
|
"Analyze these error logs and identify the root cause. \
|
|
|
|
|
Suggest fixes and preventive measures.",
|
|
|
|
|
None,
|
|
|
|
|
20
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
println!("Root Cause Analysis:\n{}", analysis.text);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 4. Knowledge Base Building
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Cargar múltiples documentos
|
|
|
|
|
let docs = vec![
|
|
|
|
|
("guide1.md", std::fs::read_to_string("docs/guide1.md")?),
|
|
|
|
|
("guide2.md", std::fs::read_to_string("docs/guide2.md")?),
|
|
|
|
|
("api.md", std::fs::read_to_string("docs/api.md")?),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (id, content) in docs {
|
|
|
|
|
engine.load_document(id, &content, None).await?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Query cross-document
|
|
|
|
|
let results = engine.query(
|
|
|
|
|
"guide1.md", // Busca en este doc primero
|
|
|
|
|
"authentication setup",
|
|
|
|
|
None,
|
|
|
|
|
5
|
|
|
|
|
).await?;
|
|
|
|
|
|
|
|
|
|
// También buscar en otros docs
|
|
|
|
|
let all_results = futures::future::join_all(
|
|
|
|
|
["guide1.md", "guide2.md", "api.md"]
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|doc_id| engine.query(doc_id, "authentication", None, 3))
|
|
|
|
|
).await;
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Adaptación
|
|
|
|
|
|
|
|
|
|
### Tuning de Performance
|
|
|
|
|
|
|
|
|
|
#### 1. Chunk Size Optimization
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Para documentos técnicos densos
|
|
|
|
|
ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Semantic,
|
|
|
|
|
chunk_size: 2000, // Chunks grandes = más contexto
|
|
|
|
|
overlap: 400, // Overlap mayor para continuidad
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Para búsqueda precisa
|
|
|
|
|
ChunkingConfig {
|
|
|
|
|
strategy: ChunkingStrategy::Fixed,
|
|
|
|
|
chunk_size: 500, // Chunks pequeños = precisión
|
|
|
|
|
overlap: 50, // Overlap menor = más chunks únicos
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Regla general:**
|
|
|
|
|
- Chunks grandes (1500-2000): Más contexto, menos chunks, búsqueda más lenta
|
|
|
|
|
- Chunks medianos (1000-1500): Balance óptimo
|
|
|
|
|
- Chunks pequeños (500-1000): Precisión alta, más chunks, más tokens
|
|
|
|
|
|
|
|
|
|
#### 2. Embedding Provider Selection
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use vapora_rlm::embeddings::EmbeddingConfig;
|
|
|
|
|
|
|
|
|
|
// Desarrollo local (gratis)
|
|
|
|
|
let config = RLMEngineConfig {
|
|
|
|
|
embedding: Some(EmbeddingConfig::ollama("llama3.2")),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Producción (mejor calidad)
|
|
|
|
|
let config = RLMEngineConfig {
|
|
|
|
|
embedding: Some(EmbeddingConfig::openai_large()), // 3072 dims
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Balance costo/calidad
|
|
|
|
|
let config = RLMEngineConfig {
|
|
|
|
|
embedding: Some(EmbeddingConfig::openai_small()), // 1536 dims
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### 3. Query Optimization
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
// Búsqueda amplia (exploratoria)
|
|
|
|
|
let results = engine.query(doc_id, query, None, 20).await?;
|
|
|
|
|
|
|
|
|
|
// Búsqueda precisa (top results)
|
|
|
|
|
let results = engine.query(doc_id, query, None, 3).await?;
|
|
|
|
|
|
|
|
|
|
// Con embedding personalizado
|
|
|
|
|
let embedding = embedding_generator.embed(query).await?;
|
|
|
|
|
let results = engine.query(doc_id, query, Some(&embedding), 5).await?;
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Custom Chunking Strategy
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use vapora_rlm::chunking::{Chunker, ChunkResult};
|
|
|
|
|
|
|
|
|
|
// Implementa tu propia estrategia
|
|
|
|
|
pub struct CustomChunker {
|
|
|
|
|
chunk_size: usize,
|
|
|
|
|
// ... custom logic
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Chunker for CustomChunker {
|
|
|
|
|
fn chunk(&self, text: &str) -> Result<Vec<ChunkResult>, ChunkingError> {
|
|
|
|
|
// Tu lógica de chunking
|
|
|
|
|
todo!()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Custom Aggregation Strategy
|
|
|
|
|
|
|
|
|
|
```rust
|
|
|
|
|
use vapora_rlm::dispatch::{DispatchConfig, AggregationStrategy};
|
|
|
|
|
|
|
|
|
|
let config = DispatchConfig {
|
|
|
|
|
include_content: true,
|
|
|
|
|
include_metadata: false,
|
|
|
|
|
max_chunks_per_dispatch: 10,
|
|
|
|
|
aggregation: AggregationStrategy::MajorityVote, // Para clasificación
|
|
|
|
|
};
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Troubleshooting
|
|
|
|
|
|
|
|
|
|
### Problem: Query returns 0 results
|
|
|
|
|
|
|
|
|
|
**Diagnóstico:**
|
|
|
|
|
```rust
|
|
|
|
|
// 1. Verificar chunks cargados
|
|
|
|
|
let stats = bm25_index.stats();
|
|
|
|
|
println!("BM25 docs: {}", stats.num_docs);
|
|
|
|
|
|
|
|
|
|
// 2. Verificar storage
|
|
|
|
|
let chunks = storage.get_chunks(doc_id).await?;
|
|
|
|
|
println!("Storage chunks: {}", chunks.len());
|
|
|
|
|
|
|
|
|
|
// 3. Test BM25 directo
|
|
|
|
|
let bm25_results = bm25_index.search(query, 10)?;
|
|
|
|
|
println!("BM25 results: {}", bm25_results.len());
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Soluciones:**
|
|
|
|
|
- Asegurar que `load_document()` completó exitosamente
|
|
|
|
|
- Verificar que el query matchea contenido del documento
|
|
|
|
|
- Aumentar límite de resultados (`limit`)
|
|
|
|
|
- Usar keywords más generales
|
|
|
|
|
|
|
|
|
|
### Problem: Slow load performance
|
|
|
|
|
|
|
|
|
|
**Diagnóstico:**
|
|
|
|
|
```rust
|
|
|
|
|
let start = Instant::now();
|
|
|
|
|
let chunks = engine.load_document(doc_id, content, None).await?;
|
|
|
|
|
println!("Load time: {:?}", start.elapsed());
|
|
|
|
|
println!("Chunks created: {}", chunks);
|
|
|
|
|
println!("Time per chunk: {:?}", start.elapsed() / chunks as u32);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Optimizaciones:**
|
|
|
|
|
- Deshabilitar embeddings temporalmente: `embedding: None`
|
|
|
|
|
- Aumentar chunk size (menos chunks)
|
|
|
|
|
- Usar `auto_rebuild_bm25: false` y rebuild manual
|
|
|
|
|
- Batch loading para múltiples documentos
|
|
|
|
|
|
|
|
|
|
### Problem: High LLM costs
|
|
|
|
|
|
|
|
|
|
**Monitoreo:**
|
|
|
|
|
```rust
|
|
|
|
|
let response = engine.dispatch_subtask(doc_id, task, None, 5).await?;
|
|
|
|
|
|
|
|
|
|
let cost = (response.total_input_tokens as f64 * 5.0 / 1_000_000.0)
|
|
|
|
|
+ (response.total_output_tokens as f64 * 15.0 / 1_000_000.0);
|
|
|
|
|
|
|
|
|
|
println!("Cost this call: ${:.4}", cost);
|
|
|
|
|
println!("Input tokens: {}", response.total_input_tokens);
|
|
|
|
|
println!("Output tokens: {}", response.total_output_tokens);
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Reducciones:**
|
|
|
|
|
- Reducir `limit` en queries (menos chunks al LLM)
|
|
|
|
|
- Usar modelos más baratos (gpt-3.5-turbo vs gpt-4)
|
|
|
|
|
- Usar Ollama local para desarrollo
|
|
|
|
|
- Cache de resultados frecuentes
|
|
|
|
|
- Chunk size más grande (menos llamadas)
|
|
|
|
|
|
|
|
|
|
### Problem: SurrealDB schema errors
|
|
|
|
|
|
|
|
|
|
**Verificación:**
|
|
|
|
|
```bash
|
|
|
|
|
# Aplicar schema correcto
|
|
|
|
|
cd crates/vapora-rlm/tests
|
|
|
|
|
bash test_setup.sh
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**Alternativa manual:**
|
|
|
|
|
```sql
|
|
|
|
|
-- Conectar a SurrealDB
|
|
|
|
|
USE NS production DB rlm;
|
|
|
|
|
|
|
|
|
|
-- Verificar tablas
|
|
|
|
|
INFO FOR DB;
|
|
|
|
|
|
|
|
|
|
-- Limpiar si necesario
|
|
|
|
|
REMOVE TABLE rlm_chunks;
|
|
|
|
|
REMOVE TABLE rlm_buffers;
|
|
|
|
|
REMOVE TABLE rlm_executions;
|
|
|
|
|
|
|
|
|
|
-- Reaplicar schema
|
|
|
|
|
-- (copiar contenido de migrations/008_rlm_schema.surql)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Ejemplos Completos
|
|
|
|
|
|
|
|
|
|
Ver directorio `examples/`:
|
|
|
|
|
|
|
|
|
|
```bash
|
|
|
|
|
# Local development con Ollama (gratis)
|
|
|
|
|
cargo run --example local_ollama
|
|
|
|
|
|
|
|
|
|
# Production con OpenAI
|
|
|
|
|
export OPENAI_API_KEY="sk-..."
|
|
|
|
|
cargo run --example production_setup
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Referencias
|
|
|
|
|
|
2026-02-17 23:15:12 +00:00
|
|
|
- **ADR**: `docs/adrs/0029-rlm-recursive-language-models.md`
|
2026-02-16 05:12:22 +00:00
|
|
|
- **Production Guide**: `crates/vapora-rlm/PRODUCTION.md`
|
|
|
|
|
- **API Docs**: `cargo doc --open -p vapora-rlm`
|
|
|
|
|
- **Tests**: `crates/vapora-rlm/tests/`
|
|
|
|
|
|
|
|
|
|
## Siguiente Paso
|
|
|
|
|
|
|
|
|
|
1. ✅ Ejecuta ejemplo local: `cargo run --example local_ollama`
|
|
|
|
|
2. ✅ Lee el ADR para decisiones arquitecturales
|
|
|
|
|
3. ✅ Revisa tests para ejemplos de uso: `tests/e2e_integration.rs`
|
|
|
|
|
4. ✅ Consulta PRODUCTION.md para deployment
|