provisioning/scripts/fix-markdown-fences.nu

#!/usr/bin/env nu
# Fix Markdown Issues: Newlines + Closing Code Fence Violations
# Handles:
#   1. Literal \n escape sequences → actual newlines
#   2. Closing fences with language specifiers (malformed)
#
# Usage:
#   nu fix-markdown-fences.nu                            # Fix all: newlines + closing fences
#   nu fix-markdown-fences.nu --dry-run                  # Preview without changes
#   nu fix-markdown-fences.nu --phase newlines           # Only fix literal \n
#   nu fix-markdown-fences.nu --phase closing            # Only fix closing fences
#   nu fix-markdown-fences.nu --dry-run --report         # Show detailed report
#
# For opening fences (manual):
#   find . -name '*.md' -exec sed -i '' 's/^```$/```text/g; s/^```[a-z]*$/```/g' {} \;
#
# Phases:
#   newlines - Fix literal \n escape sequences → actual newlines
#   closing  - Fix malformed closing fences (remove language specifiers)
#   all      - Do both phases (default)

def main [
    --dry-run                          # Preview without making changes
    --phase: string = "all"            # Phase: cleanup|newlines|closing|opening|all
    --report                           # Show detailed before/after report
] {
    # Validate phase argument
    if $phase !~ '^(cleanup|newlines|closing|opening|all)$' {
        print $"Error: Invalid phase '$phase'. Must be: cleanup, newlines, closing, opening, or all"
        exit 1
    }

    print "🔍 Markdown Code Fence Violation Fixer"
    print ""
    let mode = if $dry_run { 'DRY-RUN (no changes)' } else { 'REAL MODE' }
    print $"Mode: ($mode)"
    print $"Phase: ($phase | str capitalize)"
    print ""

    # Discover all markdown files
    let md_files = discover-markdown-files
    print $"📄 Found ($md_files | length) markdown files"
    print ""

    # Track statistics
    mut total_cleanup_fixed = 0
    mut total_newlines_fixed = 0
    mut total_closing_fixed = 0
    mut total_opening_fixed = 0
    mut files_modified = 0

    # Process files
    for file in $md_files {
        let original_content = open $file
        mut modified_content = $original_content
        mut cleanup_fixed = 0
        mut newlines_fixed = 0
        mut closing_fixed = 0
        mut opening_fixed = 0

        # Phase -1: Clean up corrupted {$detected_lang} literals (CRITICAL)
        if ($phase == "cleanup" or $phase == "all") {
            let cleanup_result = cleanup-corrupted-fences $modified_content
            $cleanup_fixed = $cleanup_result.fixed_count
            $total_cleanup_fixed += $cleanup_fixed
            $modified_content = $cleanup_result.content
        }

        # Phase 0: Fix literal \n escape sequences (Nushell - SAFE)
        if ($phase == "newlines" or $phase == "all") {
            if ($modified_content | str contains '\n') {
                let newlines_result = fix-literal-newlines $modified_content
                $newlines_fixed = $newlines_result.fixed_count
                $total_newlines_fixed += $newlines_fixed
                $modified_content = $newlines_result.content
            }
        }

        # Phase 1: Fix closing fences (Nushell - SAFE)
        if ($phase == "closing" or $phase == "all") {
            let closing_result = fix-closing-fences $modified_content
            $closing_fixed = $closing_result.fixed_count
            $total_closing_fixed += $closing_fixed
            $modified_content = $closing_result.content
        }

        # Phase 2: Fix opening fences with language detection
        if ($phase == "opening" or $phase == "all") {
            let opening_result = fix-opening-fences $modified_content $file
            $opening_fixed = $opening_result.fixed_count
            $total_opening_fixed += $opening_fixed
            $modified_content = $opening_result.content
        }

        # Write changes if not dry-run AND if there were any fixes
        let has_changes = ($cleanup_fixed > 0) or ($newlines_fixed > 0) or ($closing_fixed > 0) or ($opening_fixed > 0)
        if $has_changes {
            if (not $dry_run) {
                $modified_content | save --force $file
            }
            $files_modified += 1

            if $report {
                print $"✏️  Modified: ($file)"
                if $cleanup_fixed > 0 {
                    print $"   Corrupted literals fixed: ($cleanup_fixed)"
                }
                if $newlines_fixed > 0 {
                    print $"   Literal newlines fixed: ($newlines_fixed)"
                }
                if $closing_fixed > 0 {
                    print $"   Closing fences fixed: ($closing_fixed)"
                }
                if $opening_fixed > 0 {
                    print $"   Opening fences fixed: ($opening_fixed)"
                }
            }
        }
    }

    print ""
    print "═════════════════════════════════════"
    print "Summary"
    print "═════════════════════════════════════"
    print $"Files scanned:            ($md_files | length)"
    print $"Corrupted literals fixed: ($total_cleanup_fixed)"
    print $"Literal newlines fixed:   ($total_newlines_fixed)"
    print $"Closing fences fixed:     ($total_closing_fixed)"
    print $"Opening fences fixed:     ($total_opening_fixed)"
    print ""

    if $dry_run {
        print "⚠️  DRY-RUN MODE: No files were actually modified"
        print "Run without --dry-run to apply changes"
    } else {
        print "✅ Changes applied successfully"
        print "Run: git diff                                    # Review changes"
        print "Run: markdownlint-cli2 '**/*.md'                # Validate (MD040, MD060)"
    }
}

# Fix literal \n escape sequences → actual newlines
def fix-literal-newlines [content] {
    {
        content: ($content | str replace -a '\n' "\n")
        fixed_count: 1
    }
}

# Clean up corrupted {$detected_lang} literals only (preserve structure and blanks)
def cleanup-corrupted-fences [content] {
    let had_corruption = $content | str contains '```{$detected_lang}'
    let fixed_content = $content | str replace -a '```{$detected_lang}' '```'
    {
        content: $fixed_content
        fixed_count: (if $had_corruption { 1 } else { 0 })
    }
}

# Discover all markdown files with proper exclusions
def discover-markdown-files [] {
    glob **/*.md
        | each { |f| $f | str replace $'(pwd)/' '' }  # Normalize to relative paths
        | where { |f|
            # Exclude system/cache directories
            let excluded = $f =~ '(node_modules/|\.git/|\.vale/|\.coder/|\.claude/|\.wrks/|/old_config/)'
            # Exclude root-level build/dist/target (but NOT tools/build, tools/dist)
            let bad_build = ($f =~ '^(build|dist|target)/' and $f !~ '^tools/(build|dist)')

            not $excluded and not $bad_build
        }
        | sort
}

# Fix malformed closing fences (remove language specifiers)
# Based on check-malformed-fences.nu logic
def fix-closing-fences [content] {
    let lines = $content | lines
    mut fixed_lines = []
    mut in_fence = false
    mut fixed_count = 0

    for line in $lines {
        if ($line =~ '^```') {
            if (not $in_fence) {
                # Opening fence - check if it has language
                if ($line =~ '^```\w+') {
                    $in_fence = true
                }
                $fixed_lines = ($fixed_lines | append $line)
            } else {
                # We're inside a fence - this is a closing fence
                # Check if it has language specifier (malformed)
                if ($line =~ '^```\w+\s*$') {
                    # Malformed: closing fence has language → fix it
                    $fixed_lines = ($fixed_lines | append '```')
                    $fixed_count += 1
                } else {
                    # Correct: closing fence without language
                    $fixed_lines = ($fixed_lines | append $line)
                }
                # Always reset fence state when we see any ``` while in fence
                $in_fence = false
            }
        } else {
            $fixed_lines = ($fixed_lines | append $line)
        }
    }

    {
        content: ($fixed_lines | str join "\n")
        fixed_count: $fixed_count
    }
}

# Fix opening fences by adding language specifiers
# Returns: {content: string, fixed_count: int}
def fix-opening-fences [content, file_path] {
    let lines = $content | lines
    mut fixed_lines = []
    mut in_fence = false
    mut fixed_count = 0

    for idx in (0..<($lines | length)) {
        let line = $lines | get $idx

        if ($line =~ '^```') {
            if (not $in_fence) {
                # This is an opening fence
                if ($line =~ '^```$') {
                    # Opening fence WITHOUT language → needs fixing
                    # Get content after fence (first 10 lines)
                    let next_start = $idx + 1
                    let next_count = if ($next_start + 10 < ($lines | length)) { 10 } else { ($lines | length) - $next_start }
                    let content_after = if $next_start < ($lines | length) {
                        $lines | skip $next_start | first $next_count
                    } else {
                        []
                    }

                    # Get context before fence (3 lines)
                    let context_start = if ($idx > 3) { $idx - 3 } else { 0 }
                    let context_before = $lines | skip $context_start | first ($idx - $context_start) | str join "\n"

                    # Detect language
                    let detected_lang = detect-language $content_after $context_before $file_path

                    # Add language to fence with detected language (using double quotes for interpolation)
                    $fixed_lines = ($fixed_lines | append $"```($detected_lang)")
                    $fixed_count += 1
                } else {
                    # Opening fence WITH language → no fix needed
                    $fixed_lines = ($fixed_lines | append $line)
                }
                # Enter fence state
                $in_fence = true
            } else {
                # We're inside a fence → this is closing fence
                $fixed_lines = ($fixed_lines | append $line)
                $in_fence = false
            }
        } else {
            $fixed_lines = ($fixed_lines | append $line)
        }
    }

    {
        content: ($fixed_lines | str join "\n")
        fixed_count: $fixed_count
    }
}

# Detect language based on content patterns, context, and file path
def detect-language [
    content_lines
    context_before
    file_path
] {
    # Priority 1: Content patterns (highest confidence)
    let lang_by_content = detect-by-content-pattern $content_lines
    if $lang_by_content != null { return $lang_by_content }

    # Priority 2: Context keywords (3 lines before fence)
    let lang_by_context = detect-by-context-keywords $context_before
    if $lang_by_context != null { return $lang_by_context }

    # Priority 3: File path hints
    let lang_by_path = detect-by-path $file_path
    if $lang_by_path != null { return $lang_by_path }

    # Priority 4: Command detection
    let lang_by_command = detect-by-commands $content_lines
    if $lang_by_command != null { return $lang_by_command }

    # Priority 5: Fallback (most common in technical docs)
    "bash"
}

# Detect language by analyzing first lines for specific patterns
def detect-by-content-pattern [content] {
    if ($content | is-empty) { return null }

    let first_line = ($content | get 0 | str trim)

    # TOML: [section] pattern
    if ($first_line =~ '^\[.*\]$') { return "toml" }

    # YAML: starts with ---
    if ($first_line =~ '^---\s*$') { return "yaml" }

    # Nushell shebang
    if ($first_line =~ '^#!.*nu') { return "nushell" }

    # Bash shebang
    if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" }

    # Rust: fn, impl, struct, trait
    if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" }

    # Nickel or JSON: check for { with nickel-specific keywords
    if ($first_line =~ '^\{') {
        let nickel_keywords = $content
            | first 5
            | str join "\n"
            | str downcase

        if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') {
            return "nickel"
        }
        return "json"
    }

    # Python: def, class, import
    if ($first_line =~ '^(def|class|import|from).*:') { return "python" }

    # JavaScript/TypeScript: function, const, var, async
    if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" }

    null
}

# Detect language by keywords in the 3 lines before the fence
def detect-by-context-keywords [context] {
    let lower_context = $context | str downcase

    # Nickel context
    if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" }

    # TOML context
    if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" }

    # Nushell context
    if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" }

    # Rust context
    if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" }

    # YAML context
    if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" }

    # Configuration
    if ($lower_context =~ '(config|configuration|settings)') { return "toml" }

    null
}

# Detect language by file path patterns
def detect-by-path [file_path] {
    let lower_path = $file_path | str downcase

    # .typedialog files are TOML-based
    if ($lower_path =~ '\.typedialog') { return "toml" }

    # Nickel-specific paths
    if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" }

    # Rust documentation
    if ($lower_path =~ '(rust|\.rs)') { return "rust" }

    # Nushell documentation
    if ($lower_path =~ '(nushell|\.nu)') { return "nushell" }

    null
}

# Detect language by command patterns
def detect-by-commands [content] {
    if ($content | is-empty) { return null }

    let first_line = ($content | get 0 | str trim)

    # Command prompt indicators
    if ($first_line =~ '^(\$|\#)' ) { return "bash" }

    # Common command prefixes
    if ($first_line =~ '^(cargo|docker|kubectl|git|make|npm|yarn|pnpm|provisioning)' ) { return "bash" }

    # Nushell-specific
    if ($first_line =~ '^nu ') { return "nushell" }

    null
}