Some checks are pending
Documentation Lint & Validation / Markdown Linting (push) Waiting to run
Documentation Lint & Validation / Validate mdBook Configuration (push) Waiting to run
Documentation Lint & Validation / Content & Structure Validation (push) Waiting to run
Documentation Lint & Validation / Lint & Validation Summary (push) Blocked by required conditions
mdBook Build & Deploy / Build mdBook (push) Waiting to run
mdBook Build & Deploy / Documentation Quality Check (push) Blocked by required conditions
mdBook Build & Deploy / Deploy to GitHub Pages (push) Blocked by required conditions
mdBook Build & Deploy / Notification (push) Blocked by required conditions
Rust CI / Security Audit (push) Waiting to run
Rust CI / Check + Test + Lint (nightly) (push) Waiting to run
Rust CI / Check + Test + Lint (stable) (push) Waiting to run
319 lines
10 KiB
Rust
319 lines
10 KiB
Rust
use std::collections::HashMap;
|
|
|
|
#[cfg(test)]
|
|
use chrono::Duration;
|
|
use chrono::{DateTime, Utc};
|
|
|
|
/// Per-task-type expertise tracking for agents with recency bias.
|
|
/// Recent performance (last 7 days) weighted 3x higher than historical
|
|
/// averages.
|
|
#[derive(Debug, Clone)]
|
|
pub struct LearningProfile {
|
|
pub agent_id: String,
|
|
pub task_type_expertise: HashMap<String, TaskTypeExpertise>,
|
|
pub last_updated: DateTime<Utc>,
|
|
}
|
|
|
|
/// Task-specific expertise metrics with learning curves.
|
|
#[derive(Debug, Clone)]
|
|
pub struct TaskTypeExpertise {
|
|
/// Overall success rate (0.0-1.0) including all historical data.
|
|
pub success_rate: f64,
|
|
/// Total number of executions for this task type.
|
|
pub total_executions: u32,
|
|
/// Success rate for last 7 days with recency bias applied.
|
|
/// Recent performance weighted 3x higher than older data.
|
|
pub recent_success_rate: f64,
|
|
/// Average duration in milliseconds.
|
|
pub avg_duration_ms: f64,
|
|
/// Time-series of expertise evolution as (timestamp, success_rate).
|
|
/// Computed by aggregating executions into daily/weekly windows.
|
|
pub learning_curve: Vec<(DateTime<Utc>, f64)>,
|
|
/// Confidence score (0.0-1.0) based on execution count.
|
|
/// Prevents overfitting: min(1.0, total_executions / 20).
|
|
pub confidence: f64,
|
|
}
|
|
|
|
impl LearningProfile {
|
|
/// Create new empty learning profile for agent.
|
|
pub fn new(agent_id: String) -> Self {
|
|
Self {
|
|
agent_id,
|
|
task_type_expertise: HashMap::new(),
|
|
last_updated: Utc::now(),
|
|
}
|
|
}
|
|
|
|
/// Add or update expertise for a task type.
|
|
pub fn set_task_type_expertise(&mut self, task_type: String, expertise: TaskTypeExpertise) {
|
|
self.task_type_expertise.insert(task_type, expertise);
|
|
self.last_updated = Utc::now();
|
|
}
|
|
|
|
/// Get expertise score for specific task type, default to 0.5 if unknown.
|
|
pub fn get_task_type_score(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| e.success_rate)
|
|
.unwrap_or(0.5)
|
|
}
|
|
|
|
/// Get recent success rate for task type (weighted with recency bias).
|
|
/// Returns recent_success_rate if available, falls back to overall
|
|
/// success_rate.
|
|
pub fn get_recent_score(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| {
|
|
if e.total_executions >= 5 {
|
|
e.recent_success_rate
|
|
} else {
|
|
e.success_rate
|
|
}
|
|
})
|
|
.unwrap_or(0.5)
|
|
}
|
|
|
|
/// Get confidence score for task type (0.0-1.0 based on execution count).
|
|
pub fn get_confidence(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| e.confidence)
|
|
.unwrap_or(0.0)
|
|
}
|
|
}
|
|
|
|
impl TaskTypeExpertise {
|
|
/// Create expertise metrics from execution data.
|
|
/// Calculates success_rate, confidence, and applies recency bias.
|
|
pub fn from_executions(executions: Vec<ExecutionData>, _task_type: &str) -> Self {
|
|
if executions.is_empty() {
|
|
return Self {
|
|
success_rate: 0.5,
|
|
total_executions: 0,
|
|
recent_success_rate: 0.5,
|
|
avg_duration_ms: 0.0,
|
|
learning_curve: Vec::new(),
|
|
confidence: 0.0,
|
|
};
|
|
}
|
|
|
|
let total_executions = executions.len() as u32;
|
|
let success_count = executions.iter().filter(|e| e.success).count() as u32;
|
|
let success_rate = success_count as f64 / total_executions as f64;
|
|
|
|
let total_duration: u64 = executions.iter().map(|e| e.duration_ms).sum();
|
|
let avg_duration_ms = total_duration as f64 / total_executions as f64;
|
|
|
|
let recent_success_rate = calculate_recency_weighted_success(&executions);
|
|
let confidence = (total_executions as f64 / 20.0).min(1.0);
|
|
|
|
let learning_curve = calculate_learning_curve(&executions);
|
|
|
|
Self {
|
|
success_rate,
|
|
total_executions,
|
|
recent_success_rate,
|
|
avg_duration_ms,
|
|
learning_curve,
|
|
confidence,
|
|
}
|
|
}
|
|
|
|
/// Update expertise with new execution result.
|
|
pub fn update_with_execution(&mut self, execution: &ExecutionData) {
|
|
let new_count = self.total_executions + 1;
|
|
let new_success_count = (self.success_rate * self.total_executions as f64).round() as u32
|
|
+ if execution.success { 1 } else { 0 };
|
|
self.success_rate = new_success_count as f64 / new_count as f64;
|
|
self.total_executions = new_count;
|
|
self.confidence = (new_count as f64 / 20.0).min(1.0);
|
|
|
|
let total_duration = self.avg_duration_ms * self.total_executions as f64
|
|
- self.avg_duration_ms
|
|
+ execution.duration_ms as f64;
|
|
self.avg_duration_ms = total_duration / new_count as f64;
|
|
}
|
|
}
|
|
|
|
/// Execution data for calculating expertise metrics.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ExecutionData {
|
|
pub timestamp: DateTime<Utc>,
|
|
pub duration_ms: u64,
|
|
pub success: bool,
|
|
}
|
|
|
|
/// Calculate success rate with recency bias.
|
|
/// Last 7 days weighted 3x higher: weight = 3.0 * e^(-days_ago / 7.0).
|
|
fn calculate_recency_weighted_success(executions: &[ExecutionData]) -> f64 {
|
|
if executions.is_empty() {
|
|
return 0.5;
|
|
}
|
|
|
|
let now = Utc::now();
|
|
let mut weighted_success = 0.0;
|
|
let mut total_weight = 0.0;
|
|
|
|
for execution in executions {
|
|
let days_ago = (now - execution.timestamp).num_days() as f64;
|
|
let weight = if days_ago < 7.0 {
|
|
3.0 * (-days_ago / 7.0).exp()
|
|
} else {
|
|
(-days_ago / 7.0).exp()
|
|
};
|
|
|
|
weighted_success += weight * if execution.success { 1.0 } else { 0.0 };
|
|
total_weight += weight;
|
|
}
|
|
|
|
if total_weight > 0.0 {
|
|
weighted_success / total_weight
|
|
} else {
|
|
0.5
|
|
}
|
|
}
|
|
|
|
/// Calculate learning curve as time-series of expertise evolution.
|
|
/// Groups executions into daily windows and computes success rate per window.
|
|
fn calculate_learning_curve(executions: &[ExecutionData]) -> Vec<(DateTime<Utc>, f64)> {
|
|
if executions.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
|
|
let mut by_day: HashMap<DateTime<Utc>, (u32, u32)> = HashMap::new();
|
|
|
|
for execution in executions {
|
|
let day_start = execution
|
|
.timestamp
|
|
.date_naive()
|
|
.and_hms_opt(0, 0, 0)
|
|
.map(|dt| dt.and_utc())
|
|
.unwrap_or_else(|| execution.timestamp);
|
|
|
|
let (total, success) = by_day.entry(day_start).or_insert((0, 0));
|
|
*total += 1;
|
|
if execution.success {
|
|
*success += 1;
|
|
}
|
|
}
|
|
|
|
let mut curve: Vec<_> = by_day
|
|
.iter()
|
|
.map(|(day, (total, success))| (*day, *success as f64 / *total as f64))
|
|
.collect();
|
|
|
|
curve.sort_by_key(|entry| entry.0);
|
|
curve
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_learning_profile_creation() {
|
|
let profile = LearningProfile::new("agent-1".to_string());
|
|
assert_eq!(profile.agent_id, "agent-1");
|
|
assert_eq!(profile.task_type_expertise.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_task_type_expertise_from_executions() {
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(2),
|
|
duration_ms: 150,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(3),
|
|
duration_ms: 120,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let expertise = TaskTypeExpertise::from_executions(executions, "coding");
|
|
assert_eq!(expertise.total_executions, 3);
|
|
assert!((expertise.success_rate - 2.0 / 3.0).abs() < 0.01);
|
|
assert!((expertise.avg_duration_ms - 123.33).abs() < 1.0);
|
|
assert!((expertise.confidence - 0.15).abs() < 0.01); // 3/20 = 0.15
|
|
}
|
|
|
|
#[test]
|
|
fn test_recency_bias_weights_recent_higher() {
|
|
let now = Utc::now();
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::days(8),
|
|
duration_ms: 100,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let recent = calculate_recency_weighted_success(&executions);
|
|
assert!(recent > 0.5); // Recent success pulls average up
|
|
}
|
|
|
|
#[test]
|
|
fn test_confidence_capped_at_one() {
|
|
let executions = (0..100)
|
|
.map(|i| ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(i),
|
|
duration_ms: 100,
|
|
success: true,
|
|
})
|
|
.collect();
|
|
|
|
let expertise = TaskTypeExpertise::from_executions(executions, "coding");
|
|
assert_eq!(expertise.confidence, 1.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_executions() {
|
|
let expertise = TaskTypeExpertise::from_executions(Vec::new(), "coding");
|
|
assert_eq!(expertise.total_executions, 0);
|
|
assert_eq!(expertise.success_rate, 0.5);
|
|
assert_eq!(expertise.confidence, 0.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_learning_curve_generation() {
|
|
let now = Utc::now();
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(25),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(24),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let curve = calculate_learning_curve(&executions);
|
|
assert!(!curve.is_empty());
|
|
// Earlier executions should have lower timestamps
|
|
for i in 1..curve.len() {
|
|
assert!(curve[i - 1].0 <= curve[i].0);
|
|
}
|
|
}
|
|
}
|