- Exclude problematic markdown files from linting (existing legacy issues) - Make clippy check less aggressive (warnings only, not -D warnings) - Move cargo test to manual stage (too slow for pre-commit) - Exclude SVG files from end-of-file-fixer and trailing-whitespace - Add markdown linting exclusions for existing documentation This allows pre-commit hooks to run successfully on new code without blocking commits due to existing issues in legacy documentation files.
317 lines
10 KiB
Rust
317 lines
10 KiB
Rust
use chrono::{DateTime, Utc};
|
|
use std::collections::HashMap;
|
|
|
|
#[cfg(test)]
|
|
use chrono::Duration;
|
|
|
|
/// Per-task-type expertise tracking for agents with recency bias.
|
|
/// Recent performance (last 7 days) weighted 3x higher than historical averages.
|
|
#[derive(Debug, Clone)]
|
|
pub struct LearningProfile {
|
|
pub agent_id: String,
|
|
pub task_type_expertise: HashMap<String, TaskTypeExpertise>,
|
|
pub last_updated: DateTime<Utc>,
|
|
}
|
|
|
|
/// Task-specific expertise metrics with learning curves.
|
|
#[derive(Debug, Clone)]
|
|
pub struct TaskTypeExpertise {
|
|
/// Overall success rate (0.0-1.0) including all historical data.
|
|
pub success_rate: f64,
|
|
/// Total number of executions for this task type.
|
|
pub total_executions: u32,
|
|
/// Success rate for last 7 days with recency bias applied.
|
|
/// Recent performance weighted 3x higher than older data.
|
|
pub recent_success_rate: f64,
|
|
/// Average duration in milliseconds.
|
|
pub avg_duration_ms: f64,
|
|
/// Time-series of expertise evolution as (timestamp, success_rate).
|
|
/// Computed by aggregating executions into daily/weekly windows.
|
|
pub learning_curve: Vec<(DateTime<Utc>, f64)>,
|
|
/// Confidence score (0.0-1.0) based on execution count.
|
|
/// Prevents overfitting: min(1.0, total_executions / 20).
|
|
pub confidence: f64,
|
|
}
|
|
|
|
impl LearningProfile {
|
|
/// Create new empty learning profile for agent.
|
|
pub fn new(agent_id: String) -> Self {
|
|
Self {
|
|
agent_id,
|
|
task_type_expertise: HashMap::new(),
|
|
last_updated: Utc::now(),
|
|
}
|
|
}
|
|
|
|
/// Add or update expertise for a task type.
|
|
pub fn set_task_type_expertise(&mut self, task_type: String, expertise: TaskTypeExpertise) {
|
|
self.task_type_expertise.insert(task_type, expertise);
|
|
self.last_updated = Utc::now();
|
|
}
|
|
|
|
/// Get expertise score for specific task type, default to 0.5 if unknown.
|
|
pub fn get_task_type_score(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| e.success_rate)
|
|
.unwrap_or(0.5)
|
|
}
|
|
|
|
/// Get recent success rate for task type (weighted with recency bias).
|
|
/// Returns recent_success_rate if available, falls back to overall success_rate.
|
|
pub fn get_recent_score(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| {
|
|
if e.total_executions >= 5 {
|
|
e.recent_success_rate
|
|
} else {
|
|
e.success_rate
|
|
}
|
|
})
|
|
.unwrap_or(0.5)
|
|
}
|
|
|
|
/// Get confidence score for task type (0.0-1.0 based on execution count).
|
|
pub fn get_confidence(&self, task_type: &str) -> f64 {
|
|
self.task_type_expertise
|
|
.get(task_type)
|
|
.map(|e| e.confidence)
|
|
.unwrap_or(0.0)
|
|
}
|
|
}
|
|
|
|
impl TaskTypeExpertise {
|
|
/// Create expertise metrics from execution data.
|
|
/// Calculates success_rate, confidence, and applies recency bias.
|
|
pub fn from_executions(executions: Vec<ExecutionData>, _task_type: &str) -> Self {
|
|
if executions.is_empty() {
|
|
return Self {
|
|
success_rate: 0.5,
|
|
total_executions: 0,
|
|
recent_success_rate: 0.5,
|
|
avg_duration_ms: 0.0,
|
|
learning_curve: Vec::new(),
|
|
confidence: 0.0,
|
|
};
|
|
}
|
|
|
|
let total_executions = executions.len() as u32;
|
|
let success_count = executions.iter().filter(|e| e.success).count() as u32;
|
|
let success_rate = success_count as f64 / total_executions as f64;
|
|
|
|
let total_duration: u64 = executions.iter().map(|e| e.duration_ms).sum();
|
|
let avg_duration_ms = total_duration as f64 / total_executions as f64;
|
|
|
|
let recent_success_rate = calculate_recency_weighted_success(&executions);
|
|
let confidence = (total_executions as f64 / 20.0).min(1.0);
|
|
|
|
let learning_curve = calculate_learning_curve(&executions);
|
|
|
|
Self {
|
|
success_rate,
|
|
total_executions,
|
|
recent_success_rate,
|
|
avg_duration_ms,
|
|
learning_curve,
|
|
confidence,
|
|
}
|
|
}
|
|
|
|
/// Update expertise with new execution result.
|
|
pub fn update_with_execution(&mut self, execution: &ExecutionData) {
|
|
let new_count = self.total_executions + 1;
|
|
let new_success_count = (self.success_rate * self.total_executions as f64).round() as u32
|
|
+ if execution.success { 1 } else { 0 };
|
|
self.success_rate = new_success_count as f64 / new_count as f64;
|
|
self.total_executions = new_count;
|
|
self.confidence = (new_count as f64 / 20.0).min(1.0);
|
|
|
|
let total_duration = self.avg_duration_ms * self.total_executions as f64
|
|
- self.avg_duration_ms
|
|
+ execution.duration_ms as f64;
|
|
self.avg_duration_ms = total_duration / new_count as f64;
|
|
}
|
|
}
|
|
|
|
/// Execution data for calculating expertise metrics.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ExecutionData {
|
|
pub timestamp: DateTime<Utc>,
|
|
pub duration_ms: u64,
|
|
pub success: bool,
|
|
}
|
|
|
|
/// Calculate success rate with recency bias.
|
|
/// Last 7 days weighted 3x higher: weight = 3.0 * e^(-days_ago / 7.0).
|
|
fn calculate_recency_weighted_success(executions: &[ExecutionData]) -> f64 {
|
|
if executions.is_empty() {
|
|
return 0.5;
|
|
}
|
|
|
|
let now = Utc::now();
|
|
let mut weighted_success = 0.0;
|
|
let mut total_weight = 0.0;
|
|
|
|
for execution in executions {
|
|
let days_ago = (now - execution.timestamp).num_days() as f64;
|
|
let weight = if days_ago < 7.0 {
|
|
3.0 * (-days_ago / 7.0).exp()
|
|
} else {
|
|
(-days_ago / 7.0).exp()
|
|
};
|
|
|
|
weighted_success += weight * if execution.success { 1.0 } else { 0.0 };
|
|
total_weight += weight;
|
|
}
|
|
|
|
if total_weight > 0.0 {
|
|
weighted_success / total_weight
|
|
} else {
|
|
0.5
|
|
}
|
|
}
|
|
|
|
/// Calculate learning curve as time-series of expertise evolution.
|
|
/// Groups executions into daily windows and computes success rate per window.
|
|
fn calculate_learning_curve(executions: &[ExecutionData]) -> Vec<(DateTime<Utc>, f64)> {
|
|
if executions.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
|
|
let mut by_day: HashMap<DateTime<Utc>, (u32, u32)> = HashMap::new();
|
|
|
|
for execution in executions {
|
|
let day_start = execution
|
|
.timestamp
|
|
.date_naive()
|
|
.and_hms_opt(0, 0, 0)
|
|
.map(|dt| dt.and_utc())
|
|
.unwrap_or_else(|| execution.timestamp);
|
|
|
|
let (total, success) = by_day.entry(day_start).or_insert((0, 0));
|
|
*total += 1;
|
|
if execution.success {
|
|
*success += 1;
|
|
}
|
|
}
|
|
|
|
let mut curve: Vec<_> = by_day
|
|
.iter()
|
|
.map(|(day, (total, success))| (*day, *success as f64 / *total as f64))
|
|
.collect();
|
|
|
|
curve.sort_by_key(|entry| entry.0);
|
|
curve
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_learning_profile_creation() {
|
|
let profile = LearningProfile::new("agent-1".to_string());
|
|
assert_eq!(profile.agent_id, "agent-1");
|
|
assert_eq!(profile.task_type_expertise.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_task_type_expertise_from_executions() {
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(2),
|
|
duration_ms: 150,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(3),
|
|
duration_ms: 120,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let expertise = TaskTypeExpertise::from_executions(executions, "coding");
|
|
assert_eq!(expertise.total_executions, 3);
|
|
assert!((expertise.success_rate - 2.0 / 3.0).abs() < 0.01);
|
|
assert!((expertise.avg_duration_ms - 123.33).abs() < 1.0);
|
|
assert!((expertise.confidence - 0.15).abs() < 0.01); // 3/20 = 0.15
|
|
}
|
|
|
|
#[test]
|
|
fn test_recency_bias_weights_recent_higher() {
|
|
let now = Utc::now();
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::days(8),
|
|
duration_ms: 100,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let recent = calculate_recency_weighted_success(&executions);
|
|
assert!(recent > 0.5); // Recent success pulls average up
|
|
}
|
|
|
|
#[test]
|
|
fn test_confidence_capped_at_one() {
|
|
let executions = (0..100)
|
|
.map(|i| ExecutionData {
|
|
timestamp: Utc::now() - Duration::hours(i),
|
|
duration_ms: 100,
|
|
success: true,
|
|
})
|
|
.collect();
|
|
|
|
let expertise = TaskTypeExpertise::from_executions(executions, "coding");
|
|
assert_eq!(expertise.confidence, 1.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_executions() {
|
|
let expertise = TaskTypeExpertise::from_executions(Vec::new(), "coding");
|
|
assert_eq!(expertise.total_executions, 0);
|
|
assert_eq!(expertise.success_rate, 0.5);
|
|
assert_eq!(expertise.confidence, 0.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_learning_curve_generation() {
|
|
let now = Utc::now();
|
|
let executions = vec![
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(25),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(24),
|
|
duration_ms: 100,
|
|
success: true,
|
|
},
|
|
ExecutionData {
|
|
timestamp: now - Duration::hours(1),
|
|
duration_ms: 100,
|
|
success: false,
|
|
},
|
|
];
|
|
|
|
let curve = calculate_learning_curve(&executions);
|
|
assert!(curve.len() > 0);
|
|
// Earlier executions should have lower timestamps
|
|
for i in 1..curve.len() {
|
|
assert!(curve[i - 1].0 <= curve[i].0);
|
|
}
|
|
}
|
|
}
|