103 lines
5.7 KiB
Plaintext
Raw Normal View History

# RAG System Validator
let rag_schema = import "../schemas/rag.ncl" in
let constraints = import "../constraints/constraints.toml" in
{
validate_rag_config | rag_schema.RagConfig -> Array String = fun config =>
let errors = [] in
# If RAG is disabled, skip subsystem validation
let errors = if config.rag.enabled != true
then [] # No validation needed for disabled RAG
else
let errors = [] in
# Embeddings validation
let errors = if config.embeddings != null
then
let e = [] in
let e = if std.array.length config.embeddings.model == 0
then e @ ["Embeddings model cannot be empty"]
else e in
let e = if config.embeddings.dimension < 1
then e @ ["Embeddings dimension must be positive"]
else e in
let e = if config.embeddings.provider == "openai" || config.embeddings.provider == "anthropic"
then if std.is_null config.embeddings.api_key
then e @ ["API key required for #{config.embeddings.provider} embeddings"]
else e
else e in
errors @ e
else errors in
# Vector DB validation
let errors = if config.vector_db != null
then
let e = [] in
let e = if config.vector_db.db_type == "surrealdb" || config.vector_db.db_type == "qdrant" || config.vector_db.db_type == "milvus"
then if std.is_null config.vector_db.url || config.vector_db.url == ""
then e @ ["Database URL required for #{config.vector_db.db_type}"]
else e
else e in
let e = if std.array.length config.vector_db.namespace == 0
then e @ ["Namespace cannot be empty"]
else e in
errors @ e
else errors in
# LLM validation
let errors = if config.llm != null
then
let e = [] in
let e = if std.array.length config.llm.model == 0
then e @ ["LLM model cannot be empty"]
else e in
let e = if config.llm.provider == "anthropic" || config.llm.provider == "openai"
then if std.is_null config.llm.api_key
then e @ ["API key required for #{config.llm.provider} LLM"]
else e
else e in
let e = if config.llm.temperature < 0.0 || config.llm.temperature > 1.0
then e @ ["Temperature must be between 0.0 and 1.0"]
else e in
let e = if config.llm.max_tokens < 1
then e @ ["Max tokens must be positive"]
else e in
errors @ e
else errors in
# Retrieval validation
let errors = if config.retrieval != null
then
let e = [] in
let e = if config.retrieval.top_k < 1
then e @ ["Top K must be at least 1"]
else e in
let e = if config.retrieval.similarity_threshold < 0.0 || config.retrieval.similarity_threshold > 1.0
then e @ ["Similarity threshold must be between 0.0 and 1.0"]
else e in
errors @ e
else errors in
# Ingestion validation
let errors = if config.ingestion != null
then
let e = [] in
let e = if config.ingestion.chunk_size < 1
then e @ ["Chunk size must be positive"]
else e in
let e = if config.ingestion.overlap >= config.ingestion.chunk_size
then e @ ["Overlap must be less than chunk size"]
else e in
let e = if std.array.length config.ingestion.doc_types == 0
then e @ ["At least one document type must be specified"]
else e in
errors @ e
else errors in
errors in
errors,
}