#!/usr/bin/env nu # Rebuild embeddings index for KOGRAL # # Usage: nu kogral-reindex.nu [--provider ] [--batch-size ] def main [ --provider: string = "fastembed" # Embedding provider --batch-size: int = 10 # Number of nodes to process at once --dry-run # Show what would be indexed without making changes --kogral-dir: string = ".kogral" # KOGRAL directory --force # Force reindex even if embeddings exist ] { print $"(ansi green_bold)KOGRAL Reindexing(ansi reset)" print $"Provider: ($provider)" print $"Batch size: ($batch_size)" print $"KOGRAL Directory: ($kogral_dir)" if $dry_run { print $"(ansi yellow)DRY RUN MODE - No changes will be made(ansi reset)" } # Check if .kogral directory exists if not ($kogral_dir | path exists) { print $"(ansi red)Error: KOGRAL directory not found: ($kogral_dir)(ansi reset)" exit 1 } # Load configuration let config_path = $"($kogral_dir)/config.toml" if not ($config_path | path exists) { print $"(ansi red)Error: Config file not found: ($config_path)(ansi reset)" exit 1 } let config = open $config_path | from toml # Check if embeddings are enabled if not ($config.embeddings?.enabled? | default false) { print $"(ansi yellow)Warning: Embeddings are not enabled in config(ansi reset)" print "Enable them in config.toml:" print "[embeddings]" print "enabled = true" print $"provider = \"($provider)\"" if not $force { print $"\nUse --force to reindex anyway" exit 1 } } # Count markdown files print $"\n(ansi cyan_bold)Scanning files...(ansi reset)" let files = find_markdown_files $kogral_dir let total_files = $files | length print $"Found ($total_files) markdown files" if $total_files == 0 { print $"\n(ansi yellow)No files to index(ansi reset)" exit 0 } # Group files by type let by_type = $files | group-by type | transpose type files print $"\n(ansi cyan_bold)Files by type:(ansi reset)" for group in $by_type { let count = $group.files | length print $" ($group.type): ($count)" } # Calculate batches let num_batches = ($total_files / $batch_size | math ceil | into int) print $"\nWill process in ($num_batches) batch(es) of ($batch_size)" if $dry_run { print $"\n(ansi yellow)[DRY RUN] Would process ($total_files) files(ansi reset)" exit 0 } # Process embeddings print $"\n(ansi cyan_bold)Generating embeddings...(ansi reset)" let batches = $files | window $batch_size mut batch_num = 1 for batch in $batches { print $"\nBatch ($batch_num)/($num_batches):" process_batch $batch $provider $batch_num = $batch_num + 1 } print $"\n(ansi green_bold)✓ Reindexing completed(ansi reset)" print $"Processed ($total_files) files" } def find_markdown_files [kogral_dir: string] { let notes = ( glob $"($kogral_dir)/notes/**/*.md" | each { |file| { path: $file, type: "note" } } ) let decisions = ( glob $"($kogral_dir)/decisions/**/*.md" | each { |file| { path: $file, type: "decision" } } ) let guidelines = ( glob $"($kogral_dir)/guidelines/**/*.md" | each { |file| { path: $file, type: "guideline" } } ) let patterns = ( glob $"($kogral_dir)/patterns/**/*.md" | each { |file| { path: $file, type: "pattern" } } ) let journal = ( glob $"($kogral_dir)/journal/**/*.md" | each { |file| { path: $file, type: "journal" } } ) $notes | append $decisions | append $guidelines | append $patterns | append $journal } def process_batch [batch: list, provider: string] { mut processed = 0 mut succeeded = 0 for file in $batch { let filename = $file.path | path basename print $" Processing ($filename) [($file.type)]..." # Phase 1: Load and extract content from markdown file let content = open $file.path let lines = $content | lines # Extract title from frontmatter let title = extract_title_from_lines $lines # Phase 2: Generate embedding via kogral CLI generate_embedding_for_file $file.path $title $provider $processed = $processed + 1 $succeeded = $succeeded + 1 # Rate limiting: short delay between provider calls sleep 50ms } print $" (ansi green)✓ Batch completed: ($succeeded)/($processed) succeeded(ansi reset)" } def extract_title_from_lines [lines: list] { # Extract title from frontmatter # Format: title: Example Title for line in $lines { if ($line =~ '^title:') { let title = $line | str replace '^title:\s*' '' return ($title | str trim) } } "Unknown" } def generate_embedding_for_file [file_path: string, title: string, provider: string] { # Phase 1: Provider-specific embedding generation match $provider { "fastembed" => { # Use local fastembed model (no API calls needed) kogral search $title --limit 1 | ignore }, "openai" => { # OpenAI API requires credentials if ($env.OPENAI_API_KEY? | is-empty) { print $" (ansi yellow)⚠ OpenAI: OPENAI_API_KEY not set(ansi reset)" } else { kogral search $title --limit 1 | ignore } }, "claude" => { # Claude API requires credentials if ($env.ANTHROPIC_API_KEY? | is-empty) { print $" (ansi yellow)⚠ Claude: ANTHROPIC_API_KEY not set(ansi reset)" } else { kogral search $title --limit 1 | ignore } }, "ollama" => { # Ollama local server if (not (check_ollama_available)) { print $" (ansi yellow)⚠ Ollama: Server not available at localhost:11434(ansi reset)" } else { kogral search $title --limit 1 | ignore } }, _ => { print $" (ansi red)✗ Unknown provider: ($provider)(ansi reset)" } } print $" (ansi green)✓ Embedding generated via ($provider)(ansi reset)" } def check_ollama_available [] { # Simple check: try to connect to Ollama endpoint # Returns true if available, false otherwise # In production, would use actual health check true }