# Language Detector Module # Analyzes code block content to detect programming language # Used by fix-markdown-fences.nu for MD040 violation fixes # Detect language based on content patterns, context, and file path export def detect-language [ content_lines: list # First 10 lines after opening fence context_before: string # 3 lines before the code fence file_path: string # Full file path ] -> string { # Priority 1: Content patterns (highest confidence) let lang_by_content = detect-by-content-pattern $content_lines if $lang_by_content != null { return $lang_by_content } # Priority 2: Context keywords (3 lines before fence) let lang_by_context = detect-by-context-keywords $context_before if $lang_by_context != null { return $lang_by_context } # Priority 3: File path hints let lang_by_path = detect-by-path $file_path if $lang_by_path != null { return $lang_by_path } # Priority 4: Command detection (starts with $, #, or common commands) let lang_by_command = detect-by-commands $content_lines if $lang_by_command != null { return $lang_by_command } # Priority 5: Fallback (most common in technical docs) "bash" } # Detect language by analyzing first lines for specific patterns def detect-by-content-pattern [content: list] -> string | null { if ($content | is-empty) { return null } let first_line = ($content | get 0 | str trim) # TOML: [section] pattern if ($first_line =~ '^\[.*\]$') { return "toml" } # YAML: starts with --- if ($first_line =~ '^---\s*$') { return "yaml" } # Nushell shebang if ($first_line =~ '^#!.*nu') { return "nushell" } # Bash shebang if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" } # Rust: fn, impl, struct, trait if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" } # Nickel: check for { with nickel-specific keywords in first 5 lines if ($first_line =~ '^\{') { let nickel_keywords = $content | get (0..($content | length | if $it > 5 { 5 } else { $it - 1 })) | str join "\n" | str downcase if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') { return "nickel" } # Otherwise likely JSON return "json" } # Python: def, class, import if ($first_line =~ '^(def|class|import|from).*:') { return "python" } # JavaScript/TypeScript: function, const, var, async if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" } null } # Detect language by keywords in the 3 lines before the fence def detect-by-context-keywords [context: string] -> string | null { let lower_context = $context | str downcase # Nickel context if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" } # TOML context if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" } # Nushell context if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" } # Rust context if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" } # YAML context if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" } # Nickel/TOML/INI configuration if ($lower_context =~ '(config|configuration|settings)') { return "toml" } null } # Detect language by file path patterns def detect-by-path [file_path: string] -> string | null { let lower_path = $file_path | str downcase # .typedialog files are TOML-based if ($lower_path =~ '\.typedialog') { return "toml" } # Nickel-specific paths if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" } # Rust documentation if ($lower_path =~ '(rust|\.rs)') { return "rust" } # Nushell documentation if ($lower_path =~ '(nushell|\.nu)') { return "nushell" } # KCL (legacy, but still in docs) if ($lower_path =~ '(kcl|\.k)') { return "text" } null } # Detect language by command patterns ($ prefix, # prefix, common commands) def detect-by-commands [content: list] -> string | null { if ($content | is-empty) { return null } let first_line = ($content | get 0 | str trim) # Command prompt indicators if ($first_line =~ '^(\$|\#)' ) { return "bash" } # Common command prefixes if ($first_line =~ '^(cargo|cargo_add|docker|kubectl|git|make|npm|yarn|pnpm)' ) { return "bash" } # Provisioning CLI if ($first_line =~ '^provisioning' ) { return "bash" } # Nushell-specific syntax in command form if ($first_line =~ '^nu ') { return "nushell" } null }