Give a warning for unclosed HTML tags

This changes the internal error message to a warning to let the user
know that the HTML tags are unbalanced. In the future this will be a
denyable lint.

This is a very primitive approach of just ignoring the end tag. Ideally
it should recover using the standard HTML parsing algorithm, since there
is a chance that there will be a cascade of errors under certain
unbalanced situations.
This commit is contained in:
Eric Huss 2025-11-05 11:42:43 -08:00
parent 5905bf1d85
commit 22065ebc79
2 changed files with 18 additions and 21 deletions

View file

@ -19,7 +19,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag, Tag
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::ops::Deref; use std::ops::Deref;
use tracing::{error, trace, warn}; use tracing::{trace, warn};
/// Helper to create a [`QualName`]. /// Helper to create a [`QualName`].
macro_rules! attr_qual_name { macro_rules! attr_qual_name {
@ -664,9 +664,18 @@ where
*is_raw = false; *is_raw = false;
if self.is_html_tag_matching(&tag.name) { if self.is_html_tag_matching(&tag.name) {
self.pop(); self.pop();
} else {
// The proper thing to do here is to recover. However, the HTML
// parsing algorithm for that is quite complex. See
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
// and the adoption agency algorithm.
warn!(
"unexpected HTML end tag `</{}>` found in `{}`\n\
Check that the HTML tags are properly balanced.",
tag.name,
self.options.path.display()
);
} }
// else the stack is corrupt. I'm not really sure
// what to do here...
} }
/// This is used to verify HTML parsing keeps the stack of tags in sync. /// This is used to verify HTML parsing keeps the stack of tags in sync.
@ -675,16 +684,10 @@ where
if let Node::Element(el) = current if let Node::Element(el) = current
&& el.name() == name && el.name() == name
{ {
return true; true
} else {
false
} }
error!(
"internal error: HTML tag stack out of sync.\n
path: `{}`\n\
current={current:?}\n\
pop name: {name}",
self.options.path.display()
);
false
} }
/// Eats all pulldown-cmark events until the next `End` matching the /// Eats all pulldown-cmark events until the next `End` matching the

View file

@ -274,18 +274,12 @@ fn unbalanced_html_tags() {
cmd.expect_stderr(str![[r#" cmd.expect_stderr(str![[r#"
INFO Book building has started INFO Book building has started
INFO Running the html backend INFO Running the html backend
ERROR internal error: HTML tag stack out of sync. WARN unexpected HTML end tag `</div>` found in `chapter_1.md`
Check that the HTML tags are properly balanced.
path: `chapter_1.md`
current=Element(Element { name: QualName { prefix: None, ns: Atom('http://www.w3.org/1999/xhtml' type=static), local: Atom('span' type=inline) }, attrs: {}, self_closing: false, was_raw: true })
pop name: div
WARN unclosed HTML tag `<div>` found in `chapter_1.md` WARN unclosed HTML tag `<div>` found in `chapter_1.md`
INFO HTML book written to `[ROOT]/book` INFO HTML book written to `[ROOT]/book`
"#]]); "#]]);
}) })
.check_main_file( .check_main_file("book/chapter_1.html", str!["<div>x<span>foo</span></div>"]);
"book/chapter_1.html",
str!["<div>x<span>foo</span></div>"],
);
} }