403 lines
14 KiB
Plaintext
Executable File
403 lines
14 KiB
Plaintext
Executable File
#!/usr/bin/env nu
|
|
# Fix Markdown Issues: Newlines + Closing Code Fence Violations
|
|
# Handles:
|
|
# 1. Literal \n escape sequences → actual newlines
|
|
# 2. Closing fences with language specifiers (malformed)
|
|
#
|
|
# Usage:
|
|
# nu fix-markdown-fences.nu # Fix all: newlines + closing fences
|
|
# nu fix-markdown-fences.nu --dry-run # Preview without changes
|
|
# nu fix-markdown-fences.nu --phase newlines # Only fix literal \n
|
|
# nu fix-markdown-fences.nu --phase closing # Only fix closing fences
|
|
# nu fix-markdown-fences.nu --dry-run --report # Show detailed report
|
|
#
|
|
# For opening fences (manual):
|
|
# find . -name '*.md' -exec sed -i '' 's/^```$/```text/g; s/^```[a-z]*$/```/g' {} \;
|
|
#
|
|
# Phases:
|
|
# newlines - Fix literal \n escape sequences → actual newlines
|
|
# closing - Fix malformed closing fences (remove language specifiers)
|
|
# all - Do both phases (default)
|
|
|
|
def main [
|
|
--dry-run # Preview without making changes
|
|
--phase: string = "all" # Phase: cleanup|newlines|closing|opening|all
|
|
--report # Show detailed before/after report
|
|
] {
|
|
# Validate phase argument
|
|
if $phase !~ '^(cleanup|newlines|closing|opening|all)$' {
|
|
print $"Error: Invalid phase '$phase'. Must be: cleanup, newlines, closing, opening, or all"
|
|
exit 1
|
|
}
|
|
|
|
print "🔍 Markdown Code Fence Violation Fixer"
|
|
print ""
|
|
let mode = if $dry_run { 'DRY-RUN (no changes)' } else { 'REAL MODE' }
|
|
print $"Mode: ($mode)"
|
|
print $"Phase: ($phase | str capitalize)"
|
|
print ""
|
|
|
|
# Discover all markdown files
|
|
let md_files = discover-markdown-files
|
|
print $"📄 Found ($md_files | length) markdown files"
|
|
print ""
|
|
|
|
# Track statistics
|
|
mut total_cleanup_fixed = 0
|
|
mut total_newlines_fixed = 0
|
|
mut total_closing_fixed = 0
|
|
mut total_opening_fixed = 0
|
|
mut files_modified = 0
|
|
|
|
# Process files
|
|
for file in $md_files {
|
|
let original_content = open $file
|
|
mut modified_content = $original_content
|
|
mut cleanup_fixed = 0
|
|
mut newlines_fixed = 0
|
|
mut closing_fixed = 0
|
|
mut opening_fixed = 0
|
|
|
|
# Phase -1: Clean up corrupted {$detected_lang} literals (CRITICAL)
|
|
if ($phase == "cleanup" or $phase == "all") {
|
|
let cleanup_result = cleanup-corrupted-fences $modified_content
|
|
$cleanup_fixed = $cleanup_result.fixed_count
|
|
$total_cleanup_fixed += $cleanup_fixed
|
|
$modified_content = $cleanup_result.content
|
|
}
|
|
|
|
# Phase 0: Fix literal \n escape sequences (Nushell - SAFE)
|
|
if ($phase == "newlines" or $phase == "all") {
|
|
if ($modified_content | str contains '\n') {
|
|
let newlines_result = fix-literal-newlines $modified_content
|
|
$newlines_fixed = $newlines_result.fixed_count
|
|
$total_newlines_fixed += $newlines_fixed
|
|
$modified_content = $newlines_result.content
|
|
}
|
|
}
|
|
|
|
# Phase 1: Fix closing fences (Nushell - SAFE)
|
|
if ($phase == "closing" or $phase == "all") {
|
|
let closing_result = fix-closing-fences $modified_content
|
|
$closing_fixed = $closing_result.fixed_count
|
|
$total_closing_fixed += $closing_fixed
|
|
$modified_content = $closing_result.content
|
|
}
|
|
|
|
# Phase 2: Fix opening fences with language detection
|
|
if ($phase == "opening" or $phase == "all") {
|
|
let opening_result = fix-opening-fences $modified_content $file
|
|
$opening_fixed = $opening_result.fixed_count
|
|
$total_opening_fixed += $opening_fixed
|
|
$modified_content = $opening_result.content
|
|
}
|
|
|
|
# Write changes if not dry-run AND if there were any fixes
|
|
let has_changes = ($cleanup_fixed > 0) or ($newlines_fixed > 0) or ($closing_fixed > 0) or ($opening_fixed > 0)
|
|
if $has_changes {
|
|
if (not $dry_run) {
|
|
$modified_content | save --force $file
|
|
}
|
|
$files_modified += 1
|
|
|
|
if $report {
|
|
print $"✏️ Modified: ($file)"
|
|
if $cleanup_fixed > 0 {
|
|
print $" Corrupted literals fixed: ($cleanup_fixed)"
|
|
}
|
|
if $newlines_fixed > 0 {
|
|
print $" Literal newlines fixed: ($newlines_fixed)"
|
|
}
|
|
if $closing_fixed > 0 {
|
|
print $" Closing fences fixed: ($closing_fixed)"
|
|
}
|
|
if $opening_fixed > 0 {
|
|
print $" Opening fences fixed: ($opening_fixed)"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
print ""
|
|
print "═════════════════════════════════════"
|
|
print "Summary"
|
|
print "═════════════════════════════════════"
|
|
print $"Files scanned: ($md_files | length)"
|
|
print $"Corrupted literals fixed: ($total_cleanup_fixed)"
|
|
print $"Literal newlines fixed: ($total_newlines_fixed)"
|
|
print $"Closing fences fixed: ($total_closing_fixed)"
|
|
print $"Opening fences fixed: ($total_opening_fixed)"
|
|
print ""
|
|
|
|
if $dry_run {
|
|
print "⚠️ DRY-RUN MODE: No files were actually modified"
|
|
print "Run without --dry-run to apply changes"
|
|
} else {
|
|
print "✅ Changes applied successfully"
|
|
print "Run: git diff # Review changes"
|
|
print "Run: markdownlint-cli2 '**/*.md' # Validate (MD040, MD060)"
|
|
}
|
|
}
|
|
|
|
# Fix literal \n escape sequences → actual newlines
|
|
def fix-literal-newlines [content] {
|
|
{
|
|
content: ($content | str replace -a '\n' "\n")
|
|
fixed_count: 1
|
|
}
|
|
}
|
|
|
|
# Clean up corrupted {$detected_lang} literals only (preserve structure and blanks)
|
|
def cleanup-corrupted-fences [content] {
|
|
let had_corruption = $content | str contains '```{$detected_lang}'
|
|
let fixed_content = $content | str replace -a '```{$detected_lang}' '```'
|
|
{
|
|
content: $fixed_content
|
|
fixed_count: (if $had_corruption { 1 } else { 0 })
|
|
}
|
|
}
|
|
|
|
# Discover all markdown files with proper exclusions
|
|
def discover-markdown-files [] {
|
|
glob **/*.md
|
|
| each { |f| $f | str replace $'(pwd)/' '' } # Normalize to relative paths
|
|
| where { |f|
|
|
# Exclude system/cache directories
|
|
let excluded = $f =~ '(node_modules/|\.git/|\.vale/|\.coder/|\.claude/|\.wrks/|/old_config/)'
|
|
# Exclude root-level build/dist/target (but NOT tools/build, tools/dist)
|
|
let bad_build = ($f =~ '^(build|dist|target)/' and $f !~ '^tools/(build|dist)')
|
|
|
|
not $excluded and not $bad_build
|
|
}
|
|
| sort
|
|
}
|
|
|
|
# Fix malformed closing fences (remove language specifiers)
|
|
# Based on check-malformed-fences.nu logic
|
|
def fix-closing-fences [content] {
|
|
let lines = $content | lines
|
|
mut fixed_lines = []
|
|
mut in_fence = false
|
|
mut fixed_count = 0
|
|
|
|
for line in $lines {
|
|
if ($line =~ '^```') {
|
|
if (not $in_fence) {
|
|
# Opening fence - check if it has language
|
|
if ($line =~ '^```\w+') {
|
|
$in_fence = true
|
|
}
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
} else {
|
|
# We're inside a fence - this is a closing fence
|
|
# Check if it has language specifier (malformed)
|
|
if ($line =~ '^```\w+\s*$') {
|
|
# Malformed: closing fence has language → fix it
|
|
$fixed_lines = ($fixed_lines | append '```')
|
|
$fixed_count += 1
|
|
} else {
|
|
# Correct: closing fence without language
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
}
|
|
# Always reset fence state when we see any ``` while in fence
|
|
$in_fence = false
|
|
}
|
|
} else {
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
}
|
|
}
|
|
|
|
{
|
|
content: ($fixed_lines | str join "\n")
|
|
fixed_count: $fixed_count
|
|
}
|
|
}
|
|
|
|
# Fix opening fences by adding language specifiers
|
|
# Returns: {content: string, fixed_count: int}
|
|
def fix-opening-fences [content, file_path] {
|
|
let lines = $content | lines
|
|
mut fixed_lines = []
|
|
mut in_fence = false
|
|
mut fixed_count = 0
|
|
|
|
for idx in (0..<($lines | length)) {
|
|
let line = $lines | get $idx
|
|
|
|
if ($line =~ '^```') {
|
|
if (not $in_fence) {
|
|
# This is an opening fence
|
|
if ($line =~ '^```$') {
|
|
# Opening fence WITHOUT language → needs fixing
|
|
# Get content after fence (first 10 lines)
|
|
let next_start = $idx + 1
|
|
let next_count = if ($next_start + 10 < ($lines | length)) { 10 } else { ($lines | length) - $next_start }
|
|
let content_after = if $next_start < ($lines | length) {
|
|
$lines | skip $next_start | first $next_count
|
|
} else {
|
|
[]
|
|
}
|
|
|
|
# Get context before fence (3 lines)
|
|
let context_start = if ($idx > 3) { $idx - 3 } else { 0 }
|
|
let context_before = $lines | skip $context_start | first ($idx - $context_start) | str join "\n"
|
|
|
|
# Detect language
|
|
let detected_lang = detect-language $content_after $context_before $file_path
|
|
|
|
# Add language to fence with detected language (using double quotes for interpolation)
|
|
$fixed_lines = ($fixed_lines | append $"```($detected_lang)")
|
|
$fixed_count += 1
|
|
} else {
|
|
# Opening fence WITH language → no fix needed
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
}
|
|
# Enter fence state
|
|
$in_fence = true
|
|
} else {
|
|
# We're inside a fence → this is closing fence
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
$in_fence = false
|
|
}
|
|
} else {
|
|
$fixed_lines = ($fixed_lines | append $line)
|
|
}
|
|
}
|
|
|
|
{
|
|
content: ($fixed_lines | str join "\n")
|
|
fixed_count: $fixed_count
|
|
}
|
|
}
|
|
|
|
# Detect language based on content patterns, context, and file path
|
|
def detect-language [
|
|
content_lines
|
|
context_before
|
|
file_path
|
|
] {
|
|
# Priority 1: Content patterns (highest confidence)
|
|
let lang_by_content = detect-by-content-pattern $content_lines
|
|
if $lang_by_content != null { return $lang_by_content }
|
|
|
|
# Priority 2: Context keywords (3 lines before fence)
|
|
let lang_by_context = detect-by-context-keywords $context_before
|
|
if $lang_by_context != null { return $lang_by_context }
|
|
|
|
# Priority 3: File path hints
|
|
let lang_by_path = detect-by-path $file_path
|
|
if $lang_by_path != null { return $lang_by_path }
|
|
|
|
# Priority 4: Command detection
|
|
let lang_by_command = detect-by-commands $content_lines
|
|
if $lang_by_command != null { return $lang_by_command }
|
|
|
|
# Priority 5: Fallback (most common in technical docs)
|
|
"bash"
|
|
}
|
|
|
|
# Detect language by analyzing first lines for specific patterns
|
|
def detect-by-content-pattern [content] {
|
|
if ($content | is-empty) { return null }
|
|
|
|
let first_line = ($content | get 0 | str trim)
|
|
|
|
# TOML: [section] pattern
|
|
if ($first_line =~ '^\[.*\]$') { return "toml" }
|
|
|
|
# YAML: starts with ---
|
|
if ($first_line =~ '^---\s*$') { return "yaml" }
|
|
|
|
# Nushell shebang
|
|
if ($first_line =~ '^#!.*nu') { return "nushell" }
|
|
|
|
# Bash shebang
|
|
if ($first_line =~ '^#!/bin/(bash|sh)') { return "bash" }
|
|
|
|
# Rust: fn, impl, struct, trait
|
|
if ($first_line =~ '^(fn|impl|struct|trait|pub)\s+') { return "rust" }
|
|
|
|
# Nickel or JSON: check for { with nickel-specific keywords
|
|
if ($first_line =~ '^\{') {
|
|
let nickel_keywords = $content
|
|
| first 5
|
|
| str join "\n"
|
|
| str downcase
|
|
|
|
if ($nickel_keywords =~ '(let\s+|import\s+|rec\s*\{|contract\s+)') {
|
|
return "nickel"
|
|
}
|
|
return "json"
|
|
}
|
|
|
|
# Python: def, class, import
|
|
if ($first_line =~ '^(def|class|import|from).*:') { return "python" }
|
|
|
|
# JavaScript/TypeScript: function, const, var, async
|
|
if ($first_line =~ '^(function|const|let|var|async|class|export)') { return "javascript" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by keywords in the 3 lines before the fence
|
|
def detect-by-context-keywords [context] {
|
|
let lower_context = $context | str downcase
|
|
|
|
# Nickel context
|
|
if ($lower_context =~ '(nickel|\.ncl|nickel eval)') { return "nickel" }
|
|
|
|
# TOML context
|
|
if ($lower_context =~ '(toml|config\.toml|\.toml)') { return "toml" }
|
|
|
|
# Nushell context
|
|
if ($lower_context =~ '(nushell|nu script|\.nu|nu eval)') { return "nushell" }
|
|
|
|
# Rust context
|
|
if ($lower_context =~ '(rust|cargo|rustc|\.rs)') { return "rust" }
|
|
|
|
# YAML context
|
|
if ($lower_context =~ '(yaml|kubernetes|k8s|\.yaml|\.yml)') { return "yaml" }
|
|
|
|
# Configuration
|
|
if ($lower_context =~ '(config|configuration|settings)') { return "toml" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by file path patterns
|
|
def detect-by-path [file_path] {
|
|
let lower_path = $file_path | str downcase
|
|
|
|
# .typedialog files are TOML-based
|
|
if ($lower_path =~ '\.typedialog') { return "toml" }
|
|
|
|
# Nickel-specific paths
|
|
if ($lower_path =~ '(nickel|\.ncl)') { return "nickel" }
|
|
|
|
# Rust documentation
|
|
if ($lower_path =~ '(rust|\.rs)') { return "rust" }
|
|
|
|
# Nushell documentation
|
|
if ($lower_path =~ '(nushell|\.nu)') { return "nushell" }
|
|
|
|
null
|
|
}
|
|
|
|
# Detect language by command patterns
|
|
def detect-by-commands [content] {
|
|
if ($content | is-empty) { return null }
|
|
|
|
let first_line = ($content | get 0 | str trim)
|
|
|
|
# Command prompt indicators
|
|
if ($first_line =~ '^(\$|\#)' ) { return "bash" }
|
|
|
|
# Common command prefixes
|
|
if ($first_line =~ '^(cargo|docker|kubectl|git|make|npm|yarn|pnpm|provisioning)' ) { return "bash" }
|
|
|
|
# Nushell-specific
|
|
if ($first_line =~ '^nu ') { return "nushell" }
|
|
|
|
null
|
|
}
|