Some checks are pending
Documentation Lint & Validation / Markdown Linting (push) Waiting to run
Documentation Lint & Validation / Validate mdBook Configuration (push) Waiting to run
Documentation Lint & Validation / Content & Structure Validation (push) Waiting to run
Documentation Lint & Validation / Lint & Validation Summary (push) Blocked by required conditions
mdBook Build & Deploy / Build mdBook (push) Waiting to run
mdBook Build & Deploy / Documentation Quality Check (push) Blocked by required conditions
mdBook Build & Deploy / Deploy to GitHub Pages (push) Blocked by required conditions
mdBook Build & Deploy / Notification (push) Blocked by required conditions
Rust CI / Security Audit (push) Waiting to run
Rust CI / Check + Test + Lint (nightly) (push) Waiting to run
Rust CI / Check + Test + Lint (stable) (push) Waiting to run
165 lines
5.4 KiB
Rust
165 lines
5.4 KiB
Rust
//! # Similarity Search Example
|
|
//!
|
|
//! Demonstrates semantic similarity search in the knowledge graph.
|
|
//!
|
|
//! ## What This Example Shows
|
|
//! - Recording execution records in KG
|
|
//! - Querying similar past tasks
|
|
//! - Using similarity scores for recommendations
|
|
//! - Pattern matching for problem solving
|
|
//!
|
|
//! ## Run
|
|
//! ```bash
|
|
//! cargo run --example 03-similarity-search -p vapora-knowledge-graph
|
|
//! ```
|
|
|
|
fn main() {
|
|
println!("=== Knowledge Graph Similarity Search ===\n");
|
|
|
|
// Step 1: Simulate execution records
|
|
#[derive(Clone, Debug)]
|
|
struct Record {
|
|
id: String,
|
|
description: String,
|
|
task_type: String,
|
|
solution: String,
|
|
}
|
|
|
|
let past_executions = [
|
|
Record {
|
|
id: "exec-1".to_string(),
|
|
description: "Implement user authentication with JWT".to_string(),
|
|
task_type: "coding".to_string(),
|
|
solution: "Used OAuth2 + JWT with 30min expiry".to_string(),
|
|
},
|
|
Record {
|
|
id: "exec-2".to_string(),
|
|
description: "Fix session timeout issues".to_string(),
|
|
task_type: "debugging".to_string(),
|
|
solution: "Extended cache TTL to 60min".to_string(),
|
|
},
|
|
Record {
|
|
id: "exec-3".to_string(),
|
|
description: "Optimize database query performance".to_string(),
|
|
task_type: "optimization".to_string(),
|
|
solution: "Added indexes on foreign keys".to_string(),
|
|
},
|
|
Record {
|
|
id: "exec-4".to_string(),
|
|
description: "Implement token refresh mechanism".to_string(),
|
|
task_type: "coding".to_string(),
|
|
solution: "Sliding window with refresh tokens".to_string(),
|
|
},
|
|
Record {
|
|
id: "exec-5".to_string(),
|
|
description: "Add API rate limiting".to_string(),
|
|
task_type: "security".to_string(),
|
|
solution: "Token bucket algorithm, 100 req/min".to_string(),
|
|
},
|
|
];
|
|
|
|
println!(
|
|
"Knowledge Graph contains {} historical executions\n",
|
|
past_executions.len()
|
|
);
|
|
|
|
// Step 2: New task to find similar solutions for
|
|
let new_task = "Implement API key authentication for third-party services";
|
|
println!("New task: {}\n", new_task);
|
|
|
|
// Step 3: Similarity computation (semantic matching)
|
|
println!("=== Searching for Similar Past Solutions ===\n");
|
|
|
|
let keywords_new = ["authentication", "API", "third-party"];
|
|
let keywords_timeout = ["session", "timeout", "cache"];
|
|
let keywords_jwt = ["JWT", "authentication", "tokens"];
|
|
let keywords_rate = ["API", "rate limit", "security"];
|
|
|
|
#[derive(Clone)]
|
|
struct SimilarityResult {
|
|
record: Record,
|
|
similarity_score: f64,
|
|
matching_keywords: u32,
|
|
}
|
|
|
|
let mut results = vec![];
|
|
|
|
// Compute Jaccard similarity
|
|
for (idx, exec) in past_executions.iter().enumerate() {
|
|
let exec_keywords = match idx {
|
|
0 => keywords_jwt.to_vec(),
|
|
1 => keywords_timeout.to_vec(),
|
|
2 => vec!["database", "performance", "optimization"],
|
|
3 => keywords_jwt.to_vec(),
|
|
4 => keywords_rate.to_vec(),
|
|
_ => vec![],
|
|
};
|
|
|
|
let intersection = keywords_new
|
|
.iter()
|
|
.filter(|k| exec_keywords.contains(k))
|
|
.count() as u32;
|
|
let union = (keywords_new.len() + exec_keywords.len() - intersection as usize) as u32;
|
|
|
|
let similarity = if union > 0 {
|
|
intersection as f64 / union as f64
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
results.push(SimilarityResult {
|
|
record: exec.clone(),
|
|
similarity_score: similarity,
|
|
matching_keywords: intersection,
|
|
});
|
|
}
|
|
|
|
// Sort by similarity
|
|
results.sort_by(|a, b| {
|
|
b.similarity_score
|
|
.partial_cmp(&a.similarity_score)
|
|
.unwrap_or(std::cmp::Ordering::Equal)
|
|
});
|
|
|
|
// Display results
|
|
println!("Rank | Similarity | Description");
|
|
println!("-----|------------|-------");
|
|
|
|
for (rank, result) in results.iter().enumerate() {
|
|
let similarity_pct = (result.similarity_score * 100.0) as u32;
|
|
let bar_length = (result.similarity_score * 20.0) as usize;
|
|
let bar = "█".repeat(bar_length) + &"░".repeat(20 - bar_length);
|
|
|
|
println!("{:2}. | {} | {}%", rank + 1, bar, similarity_pct);
|
|
println!(
|
|
" | Matches: {} | {}",
|
|
result.matching_keywords, result.record.description
|
|
);
|
|
}
|
|
|
|
// Step 4: Recommendations
|
|
println!("\n=== Top Recommendation ===\n");
|
|
if let Some(best) = results.first() {
|
|
println!("Record: {}", best.record.id);
|
|
println!("Similarity: {:.0}%", best.similarity_score * 100.0);
|
|
println!("Task type: {}", best.record.task_type);
|
|
println!("Previous solution: {}", best.record.solution);
|
|
println!("\nRecommendation:");
|
|
println!(" Consider similar approach for new task");
|
|
println!(" → Adjust solution for third-party use case");
|
|
println!(" → May need API key rotation strategy");
|
|
}
|
|
|
|
// Step 5: Learning opportunities
|
|
println!("\n=== Related Solutions ===");
|
|
for (rank, result) in results.iter().take(3).enumerate() {
|
|
println!(
|
|
"\n{}. {} ({:.0}% similarity)",
|
|
rank + 1,
|
|
result.record.description,
|
|
result.similarity_score * 100.0
|
|
);
|
|
println!(" Solution: {}", result.record.solution);
|
|
}
|
|
}
|