mdbook/crates/mdbook-html/src/html/tree.rs
Eric Huss ba4c3ed873 Add support for definition lists
This enables the definition lists support from pulldown-cmark.
This includes a config option in case it causes problems with existing
books.

Closes https://github.com/rust-lang/mdBook/issues/2770
2025-09-17 16:44:45 -07:00

1057 lines
39 KiB
Rust

//! Tree data structure for representing a markdown document.
//!
//! [`MarkdownTreeBuilder::build`] is the primary entry point of this module.
//! It takes events from [`pulldown_cmark`], and generates a [`Tree`]
//! structure of [`Node`] elements. It also handles all the various
//! transformations that mdbook performs, such as creating header links.
use super::tokenizer::parse_html;
use super::{HtmlRenderOptions, hide_lines, wrap_rust_main};
use crate::utils::{id_from_content, unique_id};
use ego_tree::{NodeId, NodeRef, Tree};
use html5ever::tendril::StrTendril;
use html5ever::tokenizer::{TagKind, Token};
use html5ever::{LocalName, QualName};
use indexmap::IndexMap;
use mdbook_core::config::RustEdition;
use mdbook_core::static_regex;
use pulldown_cmark::{
Alignment, BlockQuoteKind, CodeBlockKind, CowStr, Event, LinkType, Tag, TagEnd,
};
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::ops::Deref;
use tracing::{error, warn};
/// Helper to create a [`QualName`].
macro_rules! attr_qual_name {
($name:expr) => {
QualName::new(None, html5ever::ns!(), LocalName::from($name))
};
}
/// A node in the [`Tree`].
#[derive(Debug)]
pub(crate) enum Node {
/// An HTML [`Element`].
Element(Element),
/// Plain text.
///
/// This will be escaped when serialized.
Text(StrTendril),
/// An HTML comment.
Comment(StrTendril),
/// Root node of a tree fragment.
///
/// This is a general purpose node whenever it is convenient to have a
/// container of other nodes.
Fragment,
/// Raw data that should be copied into the output as-is without escaping.
RawData(StrTendril),
}
impl Node {
/// If this is an [`Element`], return it.
pub(crate) fn as_element(&self) -> Option<&Element> {
if let Node::Element(el) = self {
Some(el)
} else {
None
}
}
/// If this is an [`Element`], return it (mutable).
fn as_element_mut(&mut self) -> Option<&mut Element> {
if let Node::Element(el) = self {
Some(el)
} else {
None
}
}
}
/// An HTML element.
#[derive(Debug)]
pub(crate) struct Element {
/// The tag name.
pub(crate) name: QualName,
/// Element attributes.
pub(crate) attrs: Attributes,
/// True if this tag ends with `/>`.
pub(crate) self_closing: bool,
}
impl Element {
/// Creates a new HTML element.
pub(crate) fn new(tag_name: &str) -> Element {
let name = QualName::new(None, html5ever::ns!(html), LocalName::from(tag_name));
Element {
name,
attrs: Attributes::new(),
self_closing: false,
}
}
/// The name of this element.
pub(crate) fn name(&self) -> &str {
self.name.local.deref()
}
/// If this is a heading element, returns the level of the heading.
#[allow(dead_code, reason = "currently only used in search")]
pub(crate) fn heading_level(&self) -> Option<u8> {
let name = self.name();
if matches!(name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
Some(name.as_bytes()[1] - b'0')
} else {
None
}
}
/// Returns the value of an attribute.
pub(crate) fn attr(&self, name: &str) -> Option<&str> {
let qname = attr_qual_name!(name);
self.attrs.get(&qname).map(Deref::deref)
}
/// Inserts an attribute.
pub(crate) fn insert_attr(&mut self, name: &str, value: StrTendril) {
let name = attr_qual_name!(name);
self.attrs.insert(name, value);
}
}
/// A map of attributes on an [`Element`].
type Attributes = IndexMap<QualName, StrTendril>;
/// Helper to convert [`CowStr`] to a [`StrTendril`].
trait ToTendril {
/// Converts self to a [`StrTendril`].
fn into_tendril(self) -> StrTendril;
}
impl ToTendril for CowStr<'_> {
fn into_tendril(self) -> StrTendril {
match self {
CowStr::Boxed(s) => {
let s: String = s.into();
StrTendril::from(s)
}
CowStr::Borrowed(s) => StrTendril::from(s),
CowStr::Inlined(s) => StrTendril::from(s.as_ref()),
}
}
}
/// Tracks the current state of parsing a table.
///
/// This is used to determine if it should generate `<th>` or `<td>` tags.
enum TableState {
/// Currently in the table head.
Head,
/// Currently in the table body.
Body,
}
/// A builder used to create a [`Tree`] of [`Node`] elements.
///
/// Parts of this are based on pulldown-cmark's serializer (like table handling).
pub(crate) struct MarkdownTreeBuilder<'opts, 'event, EventIter> {
/// [`pulldown_cmark`] iterator of [`pulldown_cmark::Event`] elements.
events: EventIter,
/// Options for how to generate the HTML.
options: &'opts HtmlRenderOptions<'opts>,
/// The tree that is being built.
tree: Tree<Node>,
/// The ID of the current [`Node`].
current_node: NodeId,
/// The tag stack.
///
/// This is used to set the `current_node` as the parser enters and leaves
/// a tag.
tag_stack: Vec<NodeId>,
/// When parsing a table, whether or not we are currently in the head or
/// the body.
table_state: TableState,
/// When parsing a table, the alignments of the columns.
///
/// The count should match the number of columns.
table_alignments: Vec<Alignment>,
/// What parsing a table, the index of the current column.
table_cell_index: usize,
/// Mapping of footnote numbers.
///
/// This is used for generating linkbacks in the definitions.
///
/// This is a map of `name -> (number, count)`.
///
/// - `name` is the name of the footnote.
/// - `number` is the footnote number displayed in the output.
/// - `count` is the number of references to this footnote (used for multiple
/// linkbacks, and checking for unused footnotes).
footnote_numbers: HashMap<CowStr<'event>, (usize, u32)>,
/// Footnote definitions.
///
/// This is a map of `name -> NodeId` of each footnote definition. When
/// parsing footnotes, they are initially left in the position where they
/// were defined as an `<li>` tag. The [`NodeId`] here is the id of that
/// tag. After the document has been parsed, all the definitions are moved
/// to the end of the document.
footnote_defs: HashMap<CowStr<'event>, NodeId>,
}
impl<'opts, 'event, EventIter> MarkdownTreeBuilder<'opts, 'event, EventIter>
where
EventIter: Iterator<Item = Event<'event>>,
{
/// Processes a [`pulldown_cmark`] iterator of [`pulldown_cmark::Event`]
/// values, and generates a tree of [`Node`] values.
pub(crate) fn build(options: &'opts HtmlRenderOptions<'opts>, events: EventIter) -> Tree<Node> {
let tree = Tree::new(Node::Fragment);
let root = tree.root().id();
let mut builder = Self {
events,
options,
tree,
current_node: root,
tag_stack: vec![root],
table_state: TableState::Head,
table_alignments: Vec::new(),
table_cell_index: 0,
footnote_numbers: HashMap::new(),
footnote_defs: HashMap::new(),
};
builder.process_events();
builder.add_header_links();
builder.update_code_blocks();
builder.convert_fontawesome();
builder.tree
}
/// Append a new child to the current node.
///
/// Returns the [`NodeId`] of the new node.
fn append(&mut self, node: Node) -> NodeId {
self.tree
.get_mut(self.current_node)
.unwrap()
.append(node)
.id()
}
/// Appends text to the current node.
///
/// If the previous sibling is a text node, then it merges with that node.
/// This makes some processing more convenient.
fn append_text(&mut self, text: StrTendril) {
let mut current = self.tree.get_mut(self.current_node).unwrap();
if let Some(mut prev) = current.last_child()
&& let Node::Text(prev_text) = prev.value()
{
prev_text.push_slice(&text);
} else {
self.append(Node::Text(text));
}
}
/// Append a new child to the current node, and make the new node the current node.
///
/// This should only be used if you expect `pop` to be called.
fn push(&mut self, node: Node) {
let new_node = self.append(node);
self.tag_stack.push(new_node);
self.current_node = new_node;
}
/// Append a new child to the current node, and make the new node the current node.
///
/// As compared to `push`, it is *not* expected that there will be a `pop` called
/// for this node. The next call to `pop` will unwind the stack past this node.
fn push_no_stack(&mut self, node: Node) {
let new_node = self.append(node);
self.current_node = new_node;
}
/// Switch the current node to the current node's parent.
fn pop(&mut self) {
self.tag_stack.pop();
if let Some(&parent) = self.tag_stack.last() {
self.current_node = parent;
} else {
panic!("pop too far processing `{}`", self.options.path.display());
}
}
/// Returns all of the [`NodeId`]s, filtering out just the [`Element`]
/// nodes where the given callback returns `true` based on the element
/// name.
fn node_ids_for_tag(&self, filter: &dyn Fn(&str) -> bool) -> Vec<NodeId> {
self.tree
.nodes()
.filter(|node| {
let Node::Element(el) = node.value() else {
return false;
};
filter(el.name())
})
.map(|node| node.id())
.collect()
}
/// The main processing loop. Processes all events until the end.
fn process_events(&mut self) {
while let Some(event) = self.events.next() {
match event {
Event::Start(tag) => self.start_tag(tag),
Event::End(tag) => {
self.pop();
match tag {
TagEnd::TableHead => {
self.table_state = TableState::Body;
self.push(Node::Element(Element::new("tbody")));
}
TagEnd::TableCell => {
self.table_cell_index += 1;
}
TagEnd::Table => {
// Pop tbody or thead
self.pop();
}
_ => {}
}
}
Event::Text(text) => {
self.append_text(text.into_tendril());
}
Event::Code(code) => {
self.push(Node::Element(Element::new("code")));
self.append(Node::Text(code.into_tendril()));
self.pop();
}
Event::InlineMath(text) => {
let mut span = Element::new("span");
span.insert_attr("class", "math math-inline".into());
self.push(Node::Element(span));
self.append(Node::Text(text.into_tendril()));
self.pop();
}
Event::DisplayMath(text) => {
let mut span = Element::new("span");
span.insert_attr("class", "math math-display".into());
self.push(Node::Element(span));
self.append(Node::Text(text.into_tendril()));
self.pop();
}
Event::Html(html) => {
// The loop in Tag::HtmlBlock should have consumed all
// Html events.
panic!(
"`{}` unexpected Html event: {html}",
self.options.path.display()
);
}
Event::InlineHtml(html) => self.append_html(&html),
Event::FootnoteReference(name) => self.footnote_reference(name),
Event::SoftBreak => {
self.append_text("\n".into());
}
Event::HardBreak => {
self.append(Node::Element(Element::new("br")));
}
Event::Rule => {
self.append(Node::Element(Element::new("hr")));
}
Event::TaskListMarker(checked) => {
let mut input = Element::new("input");
input.insert_attr("disabled", "".into());
input.insert_attr("type", "checkbox".into());
if checked {
input.insert_attr("checked", "".into());
}
self.push(Node::Element(input));
// Add some space before whatever follows.
self.append(Node::Text(" ".into()));
self.pop();
}
}
}
self.collect_footnote_defs();
}
fn start_tag(&mut self, tag: Tag<'event>) {
let element = match tag {
Tag::Paragraph => Element::new("p"),
Tag::Heading {
level,
id,
classes,
attrs,
} => {
let mut el = Element::new(&level.to_string());
for (name, value) in attrs {
let name =
QualName::new(None, html5ever::ns!(), LocalName::from(Cow::from(name)));
let value = value.unwrap_or_else(|| CowStr::from(""));
el.attrs.insert(name, value.into_tendril());
}
if let Some(id) = id {
el.insert_attr("id", id.into_tendril());
}
if !classes.is_empty() {
let classes = classes.join(" ");
el.insert_attr("class", classes.into());
}
el
}
Tag::BlockQuote(kind) => {
let mut b = Element::new("blockquote");
if let Some(kind) = kind {
let class = match kind {
BlockQuoteKind::Note => "markdown-alert-note",
BlockQuoteKind::Tip => "markdown-alert-tip",
BlockQuoteKind::Important => "markdown-alert-important",
BlockQuoteKind::Warning => "markdown-alert-warning",
BlockQuoteKind::Caution => "markdown-alert-caution",
};
b.insert_attr("class", class.into());
}
b
}
Tag::CodeBlock(kind) => {
let mut code = Element::new("code");
match kind {
CodeBlockKind::Fenced(info) => {
let mut infos =
info.split([' ', '\t', ',']).filter(|info| !info.is_empty());
if let Some(lang) = infos.next() {
let mut classes = String::with_capacity(info.len() + 10);
// The first element in the infostring is treated as the language.
classes.push_str("language-");
classes.push_str(lang);
// The rest are just added as classes.
while let Some(info) = infos.next() {
classes.push(' ');
classes.push_str(info);
}
code.insert_attr("class", classes.into());
}
}
CodeBlockKind::Indented => {}
}
self.push_no_stack(Node::Element(Element::new("pre")));
code
}
Tag::HtmlBlock => {
// To process the HTML correctly, this needs to
// collect it all into a single string.
let mut html = String::new();
while let Some(event) = self.events.next() {
match event {
Event::Html(text) | Event::Text(text) => html.push_str(&text),
Event::End(TagEnd::HtmlBlock) => break,
_ => panic!(
"`{}` unexpected event in html block {event:?}",
self.options.path.display()
),
}
}
self.append_html(&html);
// TagEnd::HtmlBlock must not pop.
return;
}
Tag::List(Some(start)) => {
let mut ol = Element::new("ol");
if start != 1 {
ol.insert_attr("start", format!("{start}").into());
}
ol
}
Tag::List(None) => Element::new("ul"),
Tag::Item => Element::new("li"),
Tag::FootnoteDefinition(name) => {
if self.footnote_defs.contains_key(&name) {
warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
self.options.path.display()
);
self.eat_till_end();
return;
} else {
let mut el = Element::new("li");
el.insert_attr("id", format!("footnote-{name}").into());
self.push(Node::Element(el));
self.footnote_defs.insert(name, self.current_node);
return;
}
}
Tag::DefinitionList => Element::new("dl"),
Tag::DefinitionListTitle => Element::new("dt"),
Tag::DefinitionListDefinition => Element::new("dd"),
Tag::Table(alignments) => {
self.table_alignments = alignments.clone();
// This div wrapper around the table is used to apply
// `overflow-x: auto` so that wide tables can be scrolled
// horizontally, rather than overflowing or scrolling the
// entire page. See
// https://github.com/rust-lang/mdBook/pull/1617
let mut div = Element::new("div");
div.insert_attr("class", "table-wrapper".into());
self.push_no_stack(Node::Element(div));
Element::new("table")
}
Tag::TableHead => {
self.table_state = TableState::Head;
self.table_cell_index = 0;
let thead = Element::new("thead");
self.push_no_stack(Node::Element(thead));
Element::new("tr")
}
Tag::TableRow => {
self.table_cell_index = 0;
Element::new("tr")
}
Tag::TableCell => {
let mut cell = match self.table_state {
TableState::Head => Element::new("th"),
TableState::Body => Element::new("td"),
};
let style = match self.table_alignments.get(self.table_cell_index) {
Some(&Alignment::Left) => "text-align: left",
Some(&Alignment::Center) => "text-align: center",
Some(&Alignment::Right) => "text-align: right",
Some(&Alignment::None) | None => "",
};
if !style.is_empty() {
cell.insert_attr("style", style.into());
}
cell
}
Tag::Emphasis => Element::new("em"),
Tag::Strong => Element::new("strong"),
Tag::Strikethrough => Element::new("del"),
Tag::Superscript => Element::new("sup"),
Tag::Subscript => Element::new("sub"),
Tag::Link {
link_type,
dest_url,
title,
id: _,
} => {
let href: StrTendril = if matches!(link_type, LinkType::Email) {
format!("mailto:{dest_url}").into()
} else {
fix_link(dest_url).into_tendril()
};
let mut a = Element::new("a");
a.insert_attr("href", href);
if !title.is_empty() {
a.insert_attr("title", title.into_tendril());
}
a
}
Tag::Image {
link_type: _,
dest_url,
title,
id: _,
} => {
let mut img = Element::new("img");
let src = fix_link(dest_url).into_tendril();
img.insert_attr("src", src);
if !title.is_empty() {
img.insert_attr("title", title.into_tendril());
}
// This will eat TagEnd::Image
let alt = self.text_for_img_alt();
img.insert_attr("alt", alt.into());
self.append(Node::Element(img));
return;
}
Tag::MetadataBlock(_) => {
// Eat all events till the end of MetadataBlock.
while let Some(event) = self.events.next() {
if matches!(event, Event::End(TagEnd::MetadataBlock(_))) {
break;
}
}
return;
}
};
self.push(Node::Element(element));
}
/// Given some HTML, parse it into [`Node`] elements and append them to
/// the current node.
fn append_html(&mut self, html: &str) {
let tokens = parse_html(&html);
let mut is_raw = false;
for token in tokens {
match token {
Token::DoctypeToken(_) => {}
Token::TagToken(tag) => {
match tag.kind {
TagKind::StartTag => {
let is_closed = is_void_element(&tag.name) || tag.self_closing;
is_raw = matches!(&*tag.name, "script" | "style");
let name = QualName::new(None, html5ever::ns!(html), tag.name);
let attrs = tag
.attrs
.into_iter()
.map(|attr| (attr.name, attr.value))
.collect();
let mut el = Element {
name,
attrs,
self_closing: tag.self_closing,
};
fix_html_link(&mut el);
self.push(Node::Element(el));
if is_closed {
// No end element.
self.pop();
}
}
TagKind::EndTag => {
is_raw = false;
if self.is_html_tag_matching(&tag.name) {
self.pop();
}
// else the stack is corrupt. I'm not really sure
// what to do here...
}
}
}
Token::CommentToken(comment) => {
self.append(Node::Comment(comment));
}
Token::CharacterTokens(chars) => {
if is_raw {
self.append(Node::RawData(chars));
} else {
self.append_text(chars);
}
}
Token::NullCharacterToken => {}
Token::EOFToken => {}
Token::ParseError(error) => {
warn!(
"html parse error in `{}`: {error}\n\
Html text was:\n\
{html}",
self.options.path.display()
);
}
}
}
}
/// This is used to verify HTML parsing keeps the stack of tags in sync.
fn is_html_tag_matching(&self, name: &str) -> bool {
let current = self.tree.get(self.current_node).unwrap().value();
if let Node::Element(el) = current
&& el.name() == name
{
return true;
}
error!(
"internal error: HTML tag stack out of sync.\n
path: `{}`\n\
current={current:?}\n\
pop name: {name}",
self.options.path.display()
);
false
}
/// Eats all pulldown-cmark events until the next `End` matching the
/// current nesting level.
fn eat_till_end(&mut self) {
let mut nest = 0;
while let Some(event) = self.events.next() {
match event {
Event::Start(_) => nest += 1,
Event::End(_) => {
if nest == 0 {
break;
}
nest -= 1;
}
_ => {}
}
}
}
/// Eats events generating a plain text string, stripping out any
/// formatting elements.
fn text_for_img_alt(&mut self) -> String {
let mut nest = 0;
let mut output = String::new();
while let Some(event) = self.events.next() {
match event {
Event::Start(_) => nest += 1,
Event::End(_) => {
if nest == 0 {
break;
}
nest -= 1;
}
Event::Html(_) => {}
Event::InlineHtml(text) | Event::Code(text) | Event::Text(text) => {
output.push_str(&text);
}
Event::InlineMath(text) => {
output.push('$');
output.push_str(&text);
output.push('$');
}
Event::DisplayMath(text) => {
output.push_str("$$");
output.push_str(&text);
output.push_str("$$");
}
Event::SoftBreak | Event::HardBreak | Event::Rule => output.push(' '),
Event::FootnoteReference(_) => {}
Event::TaskListMarker(_) => {}
}
}
output
}
/// Appends a new footnote reference.
fn footnote_reference(&mut self, name: CowStr<'event>) {
let len = self.footnote_numbers.len() + 1;
let (n, count) = self
.footnote_numbers
.entry(name.clone())
.or_insert((len, 0));
*count += 1;
let (n, count) = (*n, *count);
let current = self.tree.get(self.current_node).unwrap();
if let Some(last) = current.last_child()
&& let Node::Element(el) = last.value()
{
if el.attr("class") == Some("footnote-reference") {
self.append(Node::Text(" ".into()));
}
}
let mut sup = Element::new("sup");
sup.insert_attr("class", "footnote-reference".into());
let id = format!("fr-{name}-{count}");
sup.insert_attr("id", id.into());
self.push(Node::Element(sup));
let mut a = Element::new("a");
a.insert_attr("href", format!("#footnote-{name}").into());
self.push(Node::Element(a));
self.append(Node::Text(format!("{n}").into()));
self.pop(); // a
self.pop(); // sup
}
/// This is used after parsing is complete to move the footnote
/// definitions to the end of the document.
fn collect_footnote_defs(&mut self) {
if self.footnote_defs.is_empty() {
return;
}
let defs = std::mem::take(&mut self.footnote_defs);
let mut defs: Vec<_> = defs.into_iter().collect();
// Detach nodes and remove unused.
defs.retain(|(name, def_id)| {
let mut node = self.tree.get_mut(*def_id).unwrap();
node.detach();
if !self.footnote_numbers.contains_key(name) {
warn!(
"footnote `{name}` in `{}` is defined but not referenced",
self.options.path.display()
);
false
} else {
true
}
});
defs.sort_by_cached_key(|(name, _)| self.footnote_numbers[name].0);
// Move defs to the end of the chapter.
self.append(Node::Element(Element::new("hr")));
let mut ol = Element::new("ol");
ol.insert_attr("class", "footnote-definition".into());
let ol_id = self.append(Node::Element(ol));
for (name, def_id) in defs {
// Generate the linkbacks.
let count = self.footnote_numbers[&name].1;
for usage in 1..=count {
let nth = if usage == 1 {
String::new()
} else {
usage.to_string()
};
let space = self.tree.orphan(Node::Text(" ".into())).id();
let mut backlink = Element::new("a");
backlink.insert_attr("href", format!("#fr-{name}-{usage}").into());
let mut backlink = self.tree.orphan(Node::Element(backlink));
backlink.append(Node::Text(format!("{nth}").into()));
let backlink = backlink.id();
let mut def = self.tree.get_mut(def_id).unwrap();
if let Some(mut last_child) = def.last_child()
&& let value = last_child.value()
&& let Node::Element(last_el) = value
&& last_el.name() == "p"
{
// Put the linkback at the end of the last paragraph instead
// of on a line by itself.
last_child.append_id(space);
last_child.append_id(backlink);
} else {
// Not a clear place to put it in this circumstance, so put it
// at the end.
def.append_id(space);
def.append_id(backlink);
};
}
let mut ol = self.tree.get_mut(ol_id).unwrap();
ol.append_id(def_id);
}
}
/// This is used after parsing is complete to add a unique `id` attribute
/// to all header and dt elements, and to also add an `<a>` tag so that
/// clicking the element will set the current URL to that element's
/// fragment.
fn add_header_links(&mut self) {
let mut id_counter = HashSet::new();
let headings = self.node_ids_for_tag(&|name| {
matches!(name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "dt")
});
for heading in headings {
let node = self.tree.get(heading).unwrap();
let el = node.value().as_element().unwrap();
let href = if let Some(id) = el.attr("id") {
format!("#{id}")
} else {
let mut id = String::new();
let node_id = node.id();
let node_ref = self.tree.get(node_id).unwrap();
text_in_node(node_ref, &mut id);
let id = id_from_content(&id);
let id = unique_id(&id, &mut id_counter);
let mut node = self.tree.get_mut(heading).unwrap();
let el = node.value().as_element_mut().unwrap();
let href = format!("#{id}");
el.insert_attr("id", id.into());
href
};
// Insert an <a> element between the heading and its children.
let mut a = Element::new("a");
a.insert_attr("class", "header".into());
a.insert_attr("href", href.into());
let mut a = self.tree.orphan(Node::Element(a));
a.reparent_from_id_append(heading);
let a_id = a.id();
let mut node = self.tree.get_mut(heading).unwrap();
node.append_id(a_id);
}
}
/// This is used after parsing is complete to set the appropriate classes
/// on a code block, to wrap hidden lines in `<span>` tags, and to add an
/// `fn main() {}` wrapper for Rust code blocks.
fn update_code_blocks(&mut self) {
let mut code_ids = self.node_ids_for_tag(&|name| name == "code");
// The processing below assumes the code block is in a contiguous
// chunk. The text nodes should have been merged during event
// processing. I don't know exactly what this should do if it
// encounters code blocks with non-text nodes.
code_ids.retain(|id| {
let code = self.tree.get(*id).unwrap();
code.children().count() == 1
});
for code_id in code_ids.iter().copied() {
let mut node = self.tree.get_mut(code_id).unwrap();
let parent_id = node.parent().unwrap().id();
let code_el = node.value().as_element_mut().unwrap();
let class = code_el.attr("class").unwrap_or_default();
let class_set: HashSet<_> = class.split(' ').collect();
let is_editable = class_set.contains("editable");
let is_playground = class_set.contains("language-rust")
&& ((!class_set.contains("ignore")
&& !class_set.contains("noplayground")
&& !class_set.contains("noplaypen")
&& self.options.config.playground.runnable)
|| class_set.contains("mdbook-runnable"));
if !is_playground {
continue;
}
let add_edition = if class_set.iter().any(|cls| cls.starts_with("edition")) {
None
} else {
self.options.edition.map(|edition| match edition {
RustEdition::E2015 => "edition2015",
RustEdition::E2018 => "edition2018",
RustEdition::E2021 => "edition2021",
RustEdition::E2024 => "edition2024",
_ => panic!("edition {edition:?} not covered"),
})
};
if let Some(edition) = add_edition {
code_el.insert_attr("class", format!("{class} {edition}").into());
}
let mut node = self.tree.get_mut(code_id).unwrap();
if !self.options.config.playground.editable || !is_editable {
if let Some(mut child) = node.first_child()
&& let Node::Text(text) = child.value()
{
if let Some(new_text) = wrap_rust_main(text) {
*text = new_text.into();
}
}
}
let mut pre = self.tree.get_mut(parent_id).unwrap();
let pre = pre.value().as_element_mut().unwrap();
assert_eq!(pre.name(), "pre");
pre.insert_attr("class", "playground".into());
}
for code_id in code_ids {
hide_lines(&mut self.tree, code_id, &self.options.config.code.hidelines);
}
}
/// This is used after parsing is complete to replace `<i>` tags with a
/// `<span>` that includes the corresponding SVG code.
fn convert_fontawesome(&mut self) {
use font_awesome_as_a_crate as fa;
let is = self.node_ids_for_tag(&|name| name == "i");
for i_id in is {
let mut icon = String::new();
let mut type_ = fa::Type::Regular;
let mut new_classes = String::from("fa-svg");
let mut node = self.tree.get_mut(i_id).unwrap();
if node.first_child().is_some() {
// Just to be safe, only translate <i></i>.
continue;
}
let i_el = node.value().as_element().unwrap();
let classes = i_el.attr("class").unwrap_or_default();
for class in classes.split(" ") {
if let Some(class) = class.strip_prefix("fa-") {
icon = class.to_owned();
} else if class == "fa" {
type_ = fa::Type::Regular;
} else if class == "fas" {
type_ = fa::Type::Solid;
} else if class == "fab" {
type_ = fa::Type::Brands;
} else {
new_classes += " ";
new_classes += class;
}
}
if !icon.is_empty()
&& let Ok(svg) = fa::svg(type_, &icon)
{
let mut span = Element::new("span");
span.insert_attr("class", new_classes.into());
for (name, value) in &i_el.attrs {
if *name != attr_qual_name!("class") {
span.attrs.insert(name.clone(), value.clone());
}
}
*node.value() = Node::Element(span);
node.append(Node::RawData(svg.into()));
}
}
}
}
/// Traverse the given node, emitting any plain text into the output.
///
/// This is used to generate the `id` of a header.
fn text_in_node(node: NodeRef<'_, Node>, output: &mut String) {
for child in node.children() {
match child.value() {
Node::Element(_) => {}
Node::Text(text) => output.push_str(text),
Node::Comment(_) => {}
Node::Fragment => {}
Node::RawData(_) => {}
}
text_in_node(child, output);
}
}
/// Modifies links to work with HTML.
///
/// For local paths, this changes the `.md` extension to `.html`.
fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> {
static_regex!(SCHEME_LINK, r"^[a-z][a-z0-9+.-]*:");
static_regex!(MD_LINK, r"(?P<link>.*)\.md(?P<anchor>#.*)?");
if link.starts_with('#') {
// Fragment-only link.
return link;
}
// Don't modify links with schemes like `https`.
if SCHEME_LINK.is_match(&link) {
return link;
}
// This is a relative link, adjust it as necessary.
if let Some(caps) = MD_LINK.captures(&link) {
let mut fixed_link = String::from(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
CowStr::from(fixed_link)
} else {
link
}
}
/// Calls [`fix_link`] for HTML elements.
fn fix_html_link(el: &mut Element) {
if el.name() != "a" {
return;
}
for attr in ["href", "xlink:href"] {
if let Some(value) = el.attr(attr) {
let fixed = fix_link(value.into());
el.insert_attr(attr, fixed.into_tendril());
}
}
}
/// Whether or not this element name is a [void element].
///
/// This is used to know whether or not to expect a `</>` end tag.
///
/// [void element]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
pub(crate) fn is_void_element(name: &str) -> bool {
matches!(
name,
"area"
| "base"
| "br"
| "col"
| "embed"
| "hr"
| "img"
| "input"
| "link"
| "meta"
| "param"
| "source"
| "track"
| "wbr"
)
}