143 lines
4.6 KiB
Plaintext
143 lines
4.6 KiB
Plaintext
# Language Detector Module
|
|
# Analyzes code block content to detect programming language
|
|
# Used by fix-markdown-fences.nu for MD040 violation fixes
|
|
|
|
# Detect language based on content patterns, context, and file path
|
|
export def detect-language [
|
|
content_lines: list<string> # First 10 lines after opening fence
|
|
context_before: string # 3 lines before the code fence
|
|
file_path: string # Full file path
|
|
] -> string {
|
|
# Priority 1: Content patterns (highest confidence)
|
|
let lang_by_content = detect-by-content-pattern $content_lines
|
|
if $lang_by_content != null { return $lang_by_content }
|
|
|
|
# Priority 2: Context keywords (3 lines before fence)
|
|
let lang_by_context = detect-by-context-keywords $context_before
|
|
if $lang_by_context != null { return $lang_by_context }
|
|
|
|
# Priority 3: File path hints
|
|
let lang_by_path = detect-by-path $file_path
|
|
if $lang_by_path != null { return $lang_by_path }
|
|
|
|
# Priority 4: Command detection (starts with $, #, or common commands)
|
|
let lang_by_command = detect-by-commands $content_lines
|
|
if $lang_by_command != null { return $lang_by_command }
|
|
|
|
# Priority 5: Fallback (most common in technical docs)
|
|
"bash"
|
|
}
|
|
|
|
# Detect language by analyzing first lines for specific patterns
|
|
def detect-by-content-pattern [content: list<string>] -> string | null {
|
|
if ($content | is-empty) { return null }
|
|
|
|
let first_line = ($content | get 0 | str trim)
|
|
|
|
# TOML: [section] pattern
|
|
if ($first_line =~ '^\[.*\]$') { return "toml" }
|
|
|
|
# YAML: starts with ---
|
|
if ($first_line =~ '^---\s*$') { return "yaml" }
|
|
|
|
# Nushell shebang
|
|
if ($first_line =~ '^#!.*nu') { return "nushell" }
|
|
|
|
# Bash shebang
|
|
if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" }
|
|
|
|
# Rust: fn, impl, struct, trait
|
|
if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" }
|
|
|
|
# Nickel: check for { with nickel-specific keywords in first 5 lines
|
|
if ($first_line =~ '^\{') {
|
|
let nickel_keywords = $content
|
|
| get (0..($content | length | if $it > 5 { 5 } else { $it - 1 }))
|
|
| str join "\n"
|
|
| str downcase
|
|
|
|
if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') {
|
|
return "nickel"
|
|
}
|
|
|
|
# Otherwise likely JSON
|
|
return "json"
|
|
}
|
|
|
|
# Python: def, class, import
|
|
if ($first_line =~ '^(def|class|import|from).*:') { return "python" }
|
|
|
|
# JavaScript/TypeScript: function, const, var, async
|
|
if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by keywords in the 3 lines before the fence
|
|
def detect-by-context-keywords [context: string] -> string | null {
|
|
let lower_context = $context | str downcase
|
|
|
|
# Nickel context
|
|
if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" }
|
|
|
|
# TOML context
|
|
if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" }
|
|
|
|
# Nushell context
|
|
if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" }
|
|
|
|
# Rust context
|
|
if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" }
|
|
|
|
# YAML context
|
|
if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" }
|
|
|
|
# Nickel/TOML/INI configuration
|
|
if ($lower_context =~ '(config|configuration|settings)') { return "toml" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by file path patterns
|
|
def detect-by-path [file_path: string] -> string | null {
|
|
let lower_path = $file_path | str downcase
|
|
|
|
# .typedialog files are TOML-based
|
|
if ($lower_path =~ '\.typedialog') { return "toml" }
|
|
|
|
# Nickel-specific paths
|
|
if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" }
|
|
|
|
# Rust documentation
|
|
if ($lower_path =~ '(rust|\.rs)') { return "rust" }
|
|
|
|
# Nushell documentation
|
|
if ($lower_path =~ '(nushell|\.nu)') { return "nushell" }
|
|
|
|
# KCL (legacy, but still in docs)
|
|
if ($lower_path =~ '(kcl|\.k)') { return "text" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by command patterns ($ prefix, # prefix, common commands)
|
|
def detect-by-commands [content: list<string>] -> string | null {
|
|
if ($content | is-empty) { return null }
|
|
|
|
let first_line = ($content | get 0 | str trim)
|
|
|
|
# Command prompt indicators
|
|
if ($first_line =~ '^(\$|\#)' ) { return "bash" }
|
|
|
|
# Common command prefixes
|
|
if ($first_line =~ '^(cargo|cargo_add|docker|kubectl|git|make|npm|yarn|pnpm)' ) { return "bash" }
|
|
|
|
# Provisioning CLI
|
|
if ($first_line =~ '^provisioning' ) { return "bash" }
|
|
|
|
# Nushell-specific syntax in command form
|
|
if ($first_line =~ '^nu ') { return "nushell" }
|
|
|
|
null
|
|
}
|