provisioning/scripts/fix-remaining-docs.py
2026-01-14 02:59:52 +00:00

149 lines
5.4 KiB
Python

#!/usr/bin/env python3
"""Fix remaining markdown errors WITHOUT creating malformed closing fences.
CRITICAL: Opening fences get language, closing fences NEVER get language.
Use stateful tracking to know inside/outside fence.
"""
import re
import sys
from pathlib import Path
def fix_file(filepath):
"""Fix markdown errors in a file while preserving fence correctness."""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
fixed_lines = []
in_fence = False
fence_lang = ''
for line in lines:
# CRITICAL: Handle code fences with state tracking
if line.startswith('```'):
if not in_fence:
# OPENING FENCE - may need language
after_fence = line[3:]
if not after_fence.strip():
# No language specified - add 'text'
fixed_lines.append('```text')
else:
# Already has language
fixed_lines.append(line)
in_fence = True
fence_lang = after_fence
else:
# CLOSING FENCE - MUST stay as just ```
fixed_lines.append('```')
in_fence = False
else:
# NOT a fence line - fix other issues
# MD013: Line too long - break at word boundaries
if len(line) > 150 and not in_fence:
# Only break lines outside fences
if not line.startswith('|') and not line.startswith('>'):
# Paragraph line - break at words
words = line.split(' ')
current = ''
for word in words:
test = current + (' ' if current else '') + word
if len(test) <= 150:
current = test
else:
if current:
fixed_lines.append(current)
current = word
if current:
fixed_lines.append(current)
continue
# MD060: Table formatting - fix spacing around pipes
if '|' in line and not in_fence:
# This might be a table line
cells = line.split('|')
fixed_cells = []
for cell in cells:
# Trim and re-add spacing
trimmed = cell.strip()
if trimmed:
fixed_cells.append(' ' + trimmed + ' ')
else:
fixed_cells.append(' ')
fixed_line = '|' + '|'.join(fixed_cells) + '|'
fixed_lines.append(fixed_line)
else:
# MD034: Bare URLs - wrap in markdown link or backticks
# Only fix non-code-fence lines
if not in_fence:
# Check for bare URLs (not already in links or code)
if 'https://' in line or 'http://' in line:
# Check if already in markdown link format [url](url)
if not re.search(r'\[.*\]\(https?://.*\)', line):
# Try to wrap in backticks if it looks like email
if '@' in line and re.search(r'\w+@\w+\.\w+', line):
line = re.sub(r'(\w+@\w+\.\w+)', r'`\1`', line)
# For actual URLs, wrap in link format
else:
line = re.sub(
r'(https?://[^\s\)]+)',
r'[\1](\1)',
line
)
fixed_lines.append(line)
result = '\n'.join(fixed_lines)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(result)
return True
def main():
"""Fix all remaining documentation errors."""
docs_root = Path('provisioning/docs/src')
# ADR files with errors
adr_files = [
'architecture/adr/adr-016-schema-driven-accessor-generation.md',
'architecture/adr/adr-017-plugin-wrapper-abstraction-framework.md',
'architecture/adr/adr-018-help-system-fluent-integration.md',
'architecture/adr/adr-019-configuration-loader-modularization.md',
'architecture/adr/adr-020-command-handler-domain-splitting.md',
]
# Getting started files with errors
getting_started_files = [
'getting-started/setup-profiles.md',
'getting-started/setup.md',
]
# Other files with errors
other_files = [
'guides/internationalization-system.md',
'roadmap/ai-integration.md',
'roadmap/nickel-workflows.md',
]
all_files = adr_files + getting_started_files + other_files
success_count = 0
for filepath_rel in all_files:
filepath = docs_root / filepath_rel
if filepath.exists():
try:
fix_file(filepath)
print(f"✓ Fixed {filepath_rel}")
success_count += 1
except Exception as e:
print(f"✗ Error fixing {filepath_rel}: {e}")
else:
print(f"⚠ File not found: {filepath_rel}")
print(f"\n✓ Fixed {success_count}/{len(all_files)} files")
return 0 if success_count == len(all_files) else 1
if __name__ == '__main__':
sys.exit(main())