#!/usr/bin/env nu # Fix Markdown Issues: Newlines + Closing Code Fence Violations # Handles: # 1. Literal \n escape sequences → actual newlines # 2. Closing fences with language specifiers (malformed) # # Usage: # nu fix-markdown-fences.nu # Fix all: newlines + closing fences # nu fix-markdown-fences.nu --dry-run # Preview without changes # nu fix-markdown-fences.nu --phase newlines # Only fix literal \n # nu fix-markdown-fences.nu --phase closing # Only fix closing fences # nu fix-markdown-fences.nu --dry-run --report # Show detailed report # # For opening fences (manual): # find . -name '*.md' -exec sed -i '' 's/^```$/```text/g; s/^```[a-z]*$/```/g' {} \; # # Phases: # newlines - Fix literal \n escape sequences → actual newlines # closing - Fix malformed closing fences (remove language specifiers) # all - Do both phases (default) def main [ --dry-run # Preview without making changes --phase: string = "all" # Phase: cleanup|newlines|closing|opening|all --report # Show detailed before/after report ] { # Validate phase argument if $phase !~ '^(cleanup|newlines|closing|opening|all)$' { print $"Error: Invalid phase '$phase'. Must be: cleanup, newlines, closing, opening, or all" exit 1 } print "🔍 Markdown Code Fence Violation Fixer" print "" let mode = if $dry_run { 'DRY-RUN (no changes)' } else { 'REAL MODE' } print $"Mode: ($mode)" print $"Phase: ($phase | str capitalize)" print "" # Discover all markdown files let md_files = discover-markdown-files print $"📄 Found ($md_files | length) markdown files" print "" # Track statistics mut total_cleanup_fixed = 0 mut total_newlines_fixed = 0 mut total_closing_fixed = 0 mut total_opening_fixed = 0 mut files_modified = 0 # Process files for file in $md_files { let original_content = open $file mut modified_content = $original_content mut cleanup_fixed = 0 mut newlines_fixed = 0 mut closing_fixed = 0 mut opening_fixed = 0 # Phase -1: Clean up corrupted {$detected_lang} literals (CRITICAL) if ($phase == "cleanup" or $phase == "all") { let cleanup_result = cleanup-corrupted-fences $modified_content $cleanup_fixed = $cleanup_result.fixed_count $total_cleanup_fixed += $cleanup_fixed $modified_content = $cleanup_result.content } # Phase 0: Fix literal \n escape sequences (Nushell - SAFE) if ($phase == "newlines" or $phase == "all") { if ($modified_content | str contains '\\n') { let newlines_result = fix-literal-newlines $modified_content $newlines_fixed = $newlines_result.fixed_count $total_newlines_fixed += $newlines_fixed $modified_content = $newlines_result.content } } # Phase 1: Fix closing fences (Nushell - SAFE) if ($phase == "closing" or $phase == "all") { let closing_result = fix-closing-fences $modified_content $closing_fixed = $closing_result.fixed_count $total_closing_fixed += $closing_fixed $modified_content = $closing_result.content } # Phase 2: Fix opening fences with language detection if ($phase == "opening" or $phase == "all") { let opening_result = fix-opening-fences $modified_content $file $opening_fixed = $opening_result.fixed_count $total_opening_fixed += $opening_fixed $modified_content = $opening_result.content } # Write changes if not dry-run AND if there were any fixes let has_changes = ($cleanup_fixed > 0) or ($newlines_fixed > 0) or ($closing_fixed > 0) or ($opening_fixed > 0) if $has_changes { if (not $dry_run) { $modified_content | save --force $file } $files_modified += 1 if $report { print $"✏️ Modified: ($file)" if $cleanup_fixed > 0 { print $" Corrupted literals fixed: ($cleanup_fixed)" } if $newlines_fixed > 0 { print $" Literal newlines fixed: ($newlines_fixed)" } if $closing_fixed > 0 { print $" Closing fences fixed: ($closing_fixed)" } if $opening_fixed > 0 { print $" Opening fences fixed: ($opening_fixed)" } } } } print "" print "═════════════════════════════════════" print "Summary" print "═════════════════════════════════════" print $"Files scanned: ($md_files | length)" print $"Corrupted literals fixed: ($total_cleanup_fixed)" print $"Literal newlines fixed: ($total_newlines_fixed)" print $"Closing fences fixed: ($total_closing_fixed)" print $"Opening fences fixed: ($total_opening_fixed)" print "" if $dry_run { print "⚠️ DRY-RUN MODE: No files were actually modified" print "Run without --dry-run to apply changes" } else { print "✅ Changes applied successfully" print "Run: git diff # Review changes" print "Run: markdownlint-cli2 '**/*.md' # Validate (MD040, MD060)" } } # Fix literal \n escape sequences → actual newlines def fix-literal-newlines [content] { { content: ($content | str replace -a '\\n' "\n") fixed_count: 1 } } # Clean up corrupted {$detected_lang} literals only (preserve structure and blanks) def cleanup-corrupted-fences [content] { # Only fix corrupted opening fences with {$detected_lang} # Replace them with clean ``` (without language tag so they can be detected later) { content: ($content | str replace -a '```{$detected_lang}' '```') fixed_count: ( $content | str collect | if ($content | str contains '```{$detected_lang}') { 1 } else { 0 } ) } } # Discover all markdown files with proper exclusions def discover-markdown-files [] { glob **/*.md | each { |f| $f | str replace $'(pwd)/' '' } # Normalize to relative paths | where { |f| # Exclude system/cache directories let excluded = $f =~ '(node_modules/|\.git/|\.vale/|\.coder/|\.claude/|\.wrks/|/old_config/)' # Exclude root-level build/dist/target (but NOT tools/build, tools/dist) let bad_build = ($f =~ '^(build|dist|target)/' and $f !~ '^tools/(build|dist)') not $excluded and not $bad_build } | sort } # Fix malformed closing fences (remove language specifiers) # Based on check-malformed-fences.nu logic def fix-closing-fences [content] { let lines = $content | lines mut fixed_lines = [] mut in_fence = false mut fixed_count = 0 for line in $lines { if ($line =~ '^```') { if (not $in_fence) { # Opening fence - check if it has language if ($line =~ '^```\w+') { $in_fence = true } $fixed_lines = ($fixed_lines | append $line) } else { # We're inside a fence - this is a closing fence # Check if it has language specifier (malformed) if ($line =~ '^```\w+\s*$') { # Malformed: closing fence has language → fix it $fixed_lines = ($fixed_lines | append '```') $fixed_count += 1 } else { # Correct: closing fence without language $fixed_lines = ($fixed_lines | append $line) } # Always reset fence state when we see any ``` while in fence $in_fence = false } } else { $fixed_lines = ($fixed_lines | append $line) } } { content: ($fixed_lines | str join "\n") fixed_count: $fixed_count } } # Fix opening fences by adding language specifiers # Returns: {content: string, fixed_count: int} def fix-opening-fences [content, file_path] { let lines = $content | lines mut fixed_lines = [] mut in_fence = false mut fixed_count = 0 for idx in (0..<($lines | length)) { let line = $lines | get $idx if ($line =~ '^```') { if (not $in_fence) { # This is an opening fence if ($line =~ '^```$') { # Opening fence WITHOUT language → needs fixing # Get content after fence (first 10 lines) let next_start = $idx + 1 let next_count = if ($next_start + 10 < ($lines | length)) { 10 } else { ($lines | length) - $next_start } let content_after = if $next_start < ($lines | length) { $lines | skip $next_start | first $next_count } else { [] } # Get context before fence (3 lines) let context_start = if ($idx > 3) { $idx - 3 } else { 0 } let context_before = $lines | skip $context_start | first ($idx - $context_start) | str join "\n" # Detect language let detected_lang = detect-language $content_after $context_before $file_path # Add language to fence with detected language (using double quotes for interpolation) $fixed_lines = ($fixed_lines | append $"```($detected_lang)") $fixed_count += 1 } else { # Opening fence WITH language → no fix needed $fixed_lines = ($fixed_lines | append $line) } # Enter fence state $in_fence = true } else { # We're inside a fence → this is closing fence $fixed_lines = ($fixed_lines | append $line) $in_fence = false } } else { $fixed_lines = ($fixed_lines | append $line) } } { content: ($fixed_lines | str join "\n") fixed_count: $fixed_count } } # Detect language based on content patterns, context, and file path def detect-language [ content_lines context_before file_path ] { # Priority 1: Content patterns (highest confidence) let lang_by_content = detect-by-content-pattern $content_lines if $lang_by_content != null { return $lang_by_content } # Priority 2: Context keywords (3 lines before fence) let lang_by_context = detect-by-context-keywords $context_before if $lang_by_context != null { return $lang_by_context } # Priority 3: File path hints let lang_by_path = detect-by-path $file_path if $lang_by_path != null { return $lang_by_path } # Priority 4: Command detection let lang_by_command = detect-by-commands $content_lines if $lang_by_command != null { return $lang_by_command } # Priority 5: Fallback (most common in technical docs) "bash" } # Detect language by analyzing first lines for specific patterns def detect-by-content-pattern [content] { if ($content | is-empty) { return null } let first_line = ($content | get 0 | str trim) # TOML: [section] pattern if ($first_line =~ '^\[.*\]$') { return "toml" } # YAML: starts with --- if ($first_line =~ '^---\s*$') { return "yaml" } # Nushell shebang if ($first_line =~ '^#!.*nu') { return "nushell" } # Bash shebang if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" } # Rust: fn, impl, struct, trait if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" } # Nickel or JSON: check for { with nickel-specific keywords if ($first_line =~ '^\{') { let nickel_keywords = $content | first 5 | str join "\n" | str downcase if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') { return "nickel" } return "json" } # Python: def, class, import if ($first_line =~ '^(def|class|import|from).*:') { return "python" } # JavaScript/TypeScript: function, const, var, async if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" } null } # Detect language by keywords in the 3 lines before the fence def detect-by-context-keywords [context] { let lower_context = $context | str downcase # Nickel context if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" } # TOML context if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" } # Nushell context if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" } # Rust context if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" } # YAML context if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" } # Configuration if ($lower_context =~ '(config|configuration|settings)') { return "toml" } null } # Detect language by file path patterns def detect-by-path [file_path] { let lower_path = $file_path | str downcase # .typedialog files are TOML-based if ($lower_path =~ '\.typedialog') { return "toml" } # Nickel-specific paths if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" } # Rust documentation if ($lower_path =~ '(rust|\.rs)') { return "rust" } # Nushell documentation if ($lower_path =~ '(nushell|\.nu)') { return "nushell" } null } # Detect language by command patterns def detect-by-commands [content] { if ($content | is-empty) { return null } let first_line = ($content | get 0 | str trim) # Command prompt indicators if ($first_line =~ '^(\$|\#)' ) { return "bash" } # Common command prefixes if ($first_line =~ '^(cargo|docker|kubectl|git|make|npm|yarn|pnpm|provisioning)' ) { return "bash" } # Nushell-specific if ($first_line =~ '^nu ') { return "nushell" } null }