provisioning/scripts/fix-markdown-fences.nu
2026-01-14 02:59:52 +00:00

340 lines
11 KiB
Plaintext

#!/usr/bin/env nu
# Fix Markdown Code Fence Violations (MD040 + CommonMark)
# Handles both closing fences with language specifiers and opening fences without language
#
# Usage:
# nu fix-markdown-fences.nu # Fix all violations
# nu fix-markdown-fences.nu --dry-run # Preview without changes
# nu fix-markdown-fences.nu --phase closing # Only fix closing fences
# nu fix-markdown-fences.nu --phase opening # Only fix opening fences (requires language detection)
# nu fix-markdown-fences.nu --dry-run --report # Show detailed report
#
# Phases:
# closing - Fix malformed closing fences (remove language specifiers)
# opening - Add language specifiers to opening fences (requires manual review)
# all - Do both phases (default)
def main [
--dry-run # Preview without making changes
--phase: string = "all" # Phase: closing|opening|all
--report # Show detailed before/after report
] {
# Validate phase argument
if $phase !~ '^(closing|opening|all)$' {
print $"Error: Invalid phase '$phase'. Must be: closing, opening, or all"
exit 1
}
print "🔍 Markdown Code Fence Violation Fixer"
print ""
let mode = if $dry_run { 'DRY-RUN (no changes)' } else { 'REAL MODE' }
print $"Mode: ($mode)"
print $"Phase: ($phase | str capitalize)"
print ""
# Discover all markdown files
let md_files = discover-markdown-files
print $"📄 Found ($md_files | length) markdown files"
print ""
# Track statistics
mut total_closing_fixed = 0
mut total_opening_fixed = 0
mut files_modified = 0
# Process files
for file in $md_files {
let original_content = open $file
mut modified_content = $original_content
mut closing_fixed = 0
mut opening_fixed = 0
# Phase 1: Fix closing fences
if ($phase == "closing" or $phase == "all") {
let closing_result = fix-closing-fences $modified_content
$closing_fixed = $closing_result.fixed_count
$total_closing_fixed += $closing_fixed
$modified_content = $closing_result.content
}
# Phase 2: Fix opening fences with language detection
if ($phase == "opening" or $phase == "all") {
let opening_result = fix-opening-fences $modified_content $file
$opening_fixed = $opening_result.fixed_count
$total_opening_fixed += $opening_fixed
$modified_content = $opening_result.content
}
# Write changes if not dry-run
if ($modified_content != $original_content) {
if (not $dry_run) {
$modified_content | save --force $file
}
$files_modified += 1
if $report {
print $"✏️ Modified: ($file)"
if $closing_fixed > 0 {
print $" Closing fences fixed: ($closing_fixed)"
}
if $opening_fixed > 0 {
print $" Opening fences fixed: ($opening_fixed)"
}
}
}
}
print ""
print "═════════════════════════════════════"
print "Summary"
print "═════════════════════════════════════"
print $"Files modified: ($files_modified)"
print $"Closing fences fixed: ($total_closing_fixed)"
print $"Opening fences fixed: ($total_opening_fixed)"
print ""
if $dry_run {
print "⚠️ DRY-RUN MODE: No files were actually modified"
print "Run without --dry-run to apply changes"
} else {
print "✅ Changes applied successfully"
print "Run: git diff # Review changes"
print "Run: nu scripts/check-malformed-fences.nu # Validate closing fences"
print "Run: markdownlint-cli2 '**/*.md' # Validate opening fences (MD040)"
}
}
# Discover all markdown files with proper exclusions
def discover-markdown-files [] {
glob **/*.md
| where { |f|
# Exclude various non-doc directories
$f !~ '(node_modules|target|build|dist|\.git|\.vale|\.coder|\.claude|\.wrks|old_config)'
}
| sort
}
# Fix malformed closing fences (remove language specifiers)
# Based on check-malformed-fences.nu logic
def fix-closing-fences [content] {
let lines = $content | lines
mut fixed_lines = []
mut in_fence = false
mut fixed_count = 0
for line in $lines {
if ($line =~ '^```') {
if (not $in_fence) {
# Opening fence - check if it has language
if ($line =~ '^```\w+') {
$in_fence = true
}
$fixed_lines = ($fixed_lines | append $line)
} else {
# We're inside a fence - this is a closing fence
# Check if it has language specifier (malformed)
if ($line =~ '^```\w+\s*$') {
# Malformed: closing fence has language → fix it
$fixed_lines = ($fixed_lines | append '```')
$fixed_count += 1
} else {
# Correct: closing fence without language
$fixed_lines = ($fixed_lines | append $line)
}
# Always reset fence state when we see any ``` while in fence
$in_fence = false
}
} else {
$fixed_lines = ($fixed_lines | append $line)
}
}
{
content: ($fixed_lines | str join '\n')
fixed_count: $fixed_count
}
}
# Fix opening fences by adding language specifiers
# Returns: {content: string, fixed_count: int}
def fix-opening-fences [content, file_path] {
let lines = $content | lines
mut fixed_lines = []
mut in_fence = false
mut fixed_count = 0
for idx in (0..($lines | length)) {
let line = $lines | get $idx
# Check if this is an opening fence without language
if ($line =~ '^```$' and not $in_fence) {
# Get content after fence (first 10 lines or until closing fence)
let next_start = $idx + 1
let next_count = if ($next_start + 10 < ($lines | length)) { 10 } else { ($lines | length) - $next_start }
let content_after = if $next_start < ($lines | length) {
$lines | skip $next_start | first $next_count
} else {
[]
}
# Get context before fence (3 lines)
let context_start = if ($idx > 3) { $idx - 3 } else { 0 }
let context_before = $lines | skip $context_start | first ($idx - $context_start) | str join '\n'
# Detect language
let detected_lang = detect-language $content_after $context_before $file_path
# Add language to fence
$fixed_lines = ($fixed_lines | append $'```{$detected_lang}')
$fixed_count += 1
$in_fence = true
} else if ($line =~ '^```') {
# Track fence state for other fences
if $in_fence {
$in_fence = false
} else {
$in_fence = true
}
$fixed_lines = ($fixed_lines | append $line)
} else {
$fixed_lines = ($fixed_lines | append $line)
}
}
{
content: ($fixed_lines | str join '\n')
fixed_count: $fixed_count
}
}
# Detect language based on content patterns, context, and file path
def detect-language [
content_lines
context_before
file_path
] {
# Priority 1: Content patterns (highest confidence)
let lang_by_content = detect-by-content-pattern $content_lines
if $lang_by_content != null { return $lang_by_content }
# Priority 2: Context keywords (3 lines before fence)
let lang_by_context = detect-by-context-keywords $context_before
if $lang_by_context != null { return $lang_by_context }
# Priority 3: File path hints
let lang_by_path = detect-by-path $file_path
if $lang_by_path != null { return $lang_by_path }
# Priority 4: Command detection
let lang_by_command = detect-by-commands $content_lines
if $lang_by_command != null { return $lang_by_command }
# Priority 5: Fallback (most common in technical docs)
"bash"
}
# Detect language by analyzing first lines for specific patterns
def detect-by-content-pattern [content] {
if ($content | is-empty) { return null }
let first_line = ($content | get 0 | str trim)
# TOML: [section] pattern
if ($first_line =~ '^\[.*\]$') { return "toml" }
# YAML: starts with ---
if ($first_line =~ '^---\s*$') { return "yaml" }
# Nushell shebang
if ($first_line =~ '^#!.*nu') { return "nushell" }
# Bash shebang
if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" }
# Rust: fn, impl, struct, trait
if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" }
# Nickel or JSON: check for { with nickel-specific keywords
if ($first_line =~ '^\{') {
let nickel_keywords = $content
| first 5
| str join "\n"
| str downcase
if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') {
return "nickel"
}
return "json"
}
# Python: def, class, import
if ($first_line =~ '^(def|class|import|from).*:') { return "python" }
# JavaScript/TypeScript: function, const, var, async
if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" }
null
}
# Detect language by keywords in the 3 lines before the fence
def detect-by-context-keywords [context] {
let lower_context = $context | str downcase
# Nickel context
if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" }
# TOML context
if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" }
# Nushell context
if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" }
# Rust context
if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" }
# YAML context
if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" }
# Configuration
if ($lower_context =~ '(config|configuration|settings)') { return "toml" }
null
}
# Detect language by file path patterns
def detect-by-path [file_path] {
let lower_path = $file_path | str downcase
# .typedialog files are TOML-based
if ($lower_path =~ '\.typedialog') { return "toml" }
# Nickel-specific paths
if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" }
# Rust documentation
if ($lower_path =~ '(rust|\.rs)') { return "rust" }
# Nushell documentation
if ($lower_path =~ '(nushell|\.nu)') { return "nushell" }
null
}
# Detect language by command patterns
def detect-by-commands [content] {
if ($content | is-empty) { return null }
let first_line = ($content | get 0 | str trim)
# Command prompt indicators
if ($first_line =~ '^(\$|\#)' ) { return "bash" }
# Common command prefixes
if ($first_line =~ '^(cargo|docker|kubectl|git|make|npm|yarn|pnpm|provisioning)' ) { return "bash" }
# Nushell-specific
if ($first_line =~ '^nu ') { return "nushell" }
null
}