From 054da77b6af4b9a07ac852aceac267b1d8daebf6 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 6 Nov 2025 07:10:48 -0800 Subject: [PATCH 1/3] Add test for unbalanced html in a header This is unexpectedly panicking. --- tests/testsuite/rendering.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index e0e77039..ea75fbd0 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -283,3 +283,25 @@ Check that the HTML tags are properly balanced. }) .check_main_file("book/chapter_1.html", str!["
xfoo
"]); } + +// Test for bug with unbalanced HTML handling in the heading. +#[test] +fn heading_with_unbalanced_html() { + BookTest::init(|_| {}) + .change_file("src/chapter_1.md", "### Option") + .run("build", |cmd| { + cmd.expect_failure().expect_stderr(str![[r#" + INFO Book building has started + INFO Running the html backend + +thread 'main' ([..]) panicked at crates/mdbook-html/src/html/tree.rs:[..] +internal error: expected empty tag stack. + + path: `chapter_1.md` +element=Element { name: QualName { prefix: None, ns: Atom('http://www.w3.org/1999/xhtml' type=static), local: Atom('h3' type=inline) }, attrs: {}, self_closing: false, was_raw: false } +note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace + +"#]]); + }); + // .check_main_file("book/chapter_1.html", str![[""]]); +} From 152132458e7bf405839a16d33b03d260c2a5387e Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 6 Nov 2025 07:13:58 -0800 Subject: [PATCH 2/3] Move end tag handling to a function This is to reduce the size of the processing function. --- crates/mdbook-html/src/html/tree.rs | 40 +++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/crates/mdbook-html/src/html/tree.rs b/crates/mdbook-html/src/html/tree.rs index 432cf7be..e4da7ef6 100644 --- a/crates/mdbook-html/src/html/tree.rs +++ b/crates/mdbook-html/src/html/tree.rs @@ -306,25 +306,7 @@ where trace!("event={event:?}"); match event { Event::Start(tag) => self.start_tag(tag), - Event::End(tag) => { - // TODO: This should validate that the event stack is - // properly synchronized with the tag stack. - self.pop(); - match tag { - TagEnd::TableHead => { - self.table_state = TableState::Body; - self.push(Node::Element(Element::new("tbody"))); - } - TagEnd::TableCell => { - self.table_cell_index += 1; - } - TagEnd::Table => { - // Pop tbody or thead - self.pop(); - } - _ => {} - } - } + Event::End(tag) => self.end_tag(tag), Event::Text(text) => { self.append_text(text.into_tendril()); } @@ -600,6 +582,26 @@ where self.push(Node::Element(element)); } + fn end_tag(&mut self, tag: TagEnd) { + // TODO: This should validate that the event stack is + // properly synchronized with the tag stack. + self.pop(); + match tag { + TagEnd::TableHead => { + self.table_state = TableState::Body; + self.push(Node::Element(Element::new("tbody"))); + } + TagEnd::TableCell => { + self.table_cell_index += 1; + } + TagEnd::Table => { + // Pop tbody or thead + self.pop(); + } + _ => {} + } + } + /// Given some HTML, parse it into [`Node`] elements and append them to /// the current node. fn append_html(&mut self, html: &str) { From 700839f77f9b87ee3663f2621d64cec299ec772d Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 6 Nov 2025 07:31:45 -0800 Subject: [PATCH 3/3] Handle unclosed HTML tags inside a markdown element This fixes an issue where it was panicking due to an unbalanced HTML tag when exiting a markdown element. The problem was that the tag stack was left non-empty when processing was finished due to `end_tag` being out of sync with the pulldown-cmark event tags. There really should be better validation that the stack is in sync and balanced, but this should address the main culprit of the interplay of raw HTML tags and pulldown-cmark events. --- crates/mdbook-html/src/html/tree.rs | 24 ++++++++++++++++++++++-- tests/testsuite/rendering.rs | 19 +++++++++---------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/crates/mdbook-html/src/html/tree.rs b/crates/mdbook-html/src/html/tree.rs index e4da7ef6..0c7c2e4d 100644 --- a/crates/mdbook-html/src/html/tree.rs +++ b/crates/mdbook-html/src/html/tree.rs @@ -583,8 +583,28 @@ where } fn end_tag(&mut self, tag: TagEnd) { - // TODO: This should validate that the event stack is - // properly synchronized with the tag stack. + // TODO: This should validate that the event stack is properly + // synchronized with the tag stack. That, would likely require keeping + // a parallel "expected end tag" with the tag stack, since mapping a + // pulldown-cmark event tag to an HTML tag isn't always clear. + // + // Check for unclosed HTML tags when exiting a markdown event. + while let Some(node_id) = self.tag_stack.last() { + let node = self.tree.get(*node_id).unwrap().value(); + let Node::Element(el) = node else { + break; + }; + if !el.was_raw { + break; + } + warn!( + "unclosed HTML tag `<{}>` found in `{}` while exiting {tag:?}\n\ + HTML tags must be closed before exiting a markdown element.", + el.name.local, + self.options.path.display(), + ); + self.pop(); + } self.pop(); match tag { TagEnd::TableHead => { diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index ea75fbd0..421aa5d4 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -290,18 +290,17 @@ fn heading_with_unbalanced_html() { BookTest::init(|_| {}) .change_file("src/chapter_1.md", "### Option") .run("build", |cmd| { - cmd.expect_failure().expect_stderr(str![[r#" + cmd.expect_stderr(str![[r#" INFO Book building has started INFO Running the html backend - -thread 'main' ([..]) panicked at crates/mdbook-html/src/html/tree.rs:[..] -internal error: expected empty tag stack. - - path: `chapter_1.md` -element=Element { name: QualName { prefix: None, ns: Atom('http://www.w3.org/1999/xhtml' type=static), local: Atom('h3' type=inline) }, attrs: {}, self_closing: false, was_raw: false } -note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace + WARN unclosed HTML tag `` found in `chapter_1.md` while exiting Heading(H3) +HTML tags must be closed before exiting a markdown element. + INFO HTML book written to `[ROOT]/book` "#]]); - }); - // .check_main_file("book/chapter_1.html", str![[""]]); + }) + .check_main_file( + "book/chapter_1.html", + str![[r##"

Option

"##]], + ); }