Introduce options struct for markdown rendering

This adds `MarkdownOptions` for creating the pulldown-cmark parser, and
`HtmlRenderOptions` for converting markdown to HTML. These types should
help make it easier to extend the rendering options while remaining
semver compatible. It should also help with just general ergonomics of
using these functions.
This commit is contained in:
Eric Huss 2025-08-22 16:17:41 -07:00
parent 0722d81295
commit f4012757a7
6 changed files with 115 additions and 60 deletions

View file

@ -8,7 +8,7 @@ use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils;
use mdbook_core::utils::fs::get_404_output_file;
use mdbook_markdown::{render_markdown, render_markdown_with_path};
use mdbook_markdown::render_markdown;
use mdbook_renderer::{RenderContext, Renderer};
use regex::{Captures, Regex};
use serde_json::json;
@ -56,10 +56,10 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}
let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation);
let fixed_content =
render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation, Some(path));
let mut options = crate::html_render_options_from_config(path, &ctx.html_config);
let content = render_markdown(&ch.content, &options);
options.for_print = true;
let fixed_content = render_markdown(&ch.content, &options);
if prev_ch.is_some() && ctx.html_config.print.page_break {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@ -193,7 +193,8 @@ impl HtmlHandlebars {
.to_string()
}
};
let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation);
let options = crate::html_render_options_from_config(Path::new("404.md"), html_config);
let html_content_404 = render_markdown(&content_404, &options);
let mut data_404 = data.clone();
let base_url = if let Some(site_url) = &html_config.site_url {

View file

@ -6,6 +6,7 @@ use log::{debug, warn};
use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{Search, SearchChapterSettings};
use mdbook_core::utils;
use mdbook_markdown::HtmlRenderOptions;
use mdbook_markdown::new_cmark_parser;
use pulldown_cmark::*;
use serde::Serialize;
@ -133,7 +134,8 @@ fn render_item(
.with_context(|| "Could not convert HTML path to str")?;
let anchor_base = utils::fs::normalize_path(filepath);
let mut p = new_cmark_parser(&chapter.content, false).peekable();
let options = HtmlRenderOptions::new(&chapter_path);
let mut p = new_cmark_parser(&chapter.content, &options.markdown_options).peekable();
let mut in_heading = false;
let max_section_depth = u32::from(search_config.heading_split_level);

View file

@ -4,3 +4,16 @@ mod html_handlebars;
pub mod theme;
pub use html_handlebars::HtmlHandlebars;
use mdbook_core::config::HtmlConfig;
use mdbook_markdown::HtmlRenderOptions;
use std::path::Path;
/// Creates an [`HtmlRenderOptions`] from the given config.
pub fn html_render_options_from_config<'a>(
path: &'a Path,
config: &HtmlConfig,
) -> HtmlRenderOptions<'a> {
let mut options = HtmlRenderOptions::new(path);
options.markdown_options.smart_punctuation = config.smart_punctuation;
options
}

View file

@ -11,7 +11,6 @@
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
@ -23,35 +22,55 @@ pub use pulldown_cmark;
#[cfg(test)]
mod tests;
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
render_markdown_with_path(text, smart_punctuation, None)
/// Options for parsing markdown.
#[derive(Default)]
#[non_exhaustive]
pub struct MarkdownOptions {
/// Enables smart punctuation.
///
/// Converts quotes to curly quotes, `...` to `…`, `--` to en-dash, and
/// `---` to em-dash.
pub smart_punctuation: bool,
}
/// Options for converting markdown to HTML.
#[non_exhaustive]
pub struct HtmlRenderOptions<'a> {
/// Options for parsing markdown.
pub markdown_options: MarkdownOptions,
/// The chapter's location, relative to the `SUMMARY.md` file.
pub path: &'a Path,
/// If true, render for the print page.
pub for_print: bool,
}
impl<'a> HtmlRenderOptions<'a> {
/// Creates a new [`HtmlRenderOptions`].
pub fn new(path: &'a Path) -> HtmlRenderOptions<'a> {
HtmlRenderOptions {
markdown_options: MarkdownOptions::default(),
path,
for_print: false,
}
}
}
/// Creates a new pulldown-cmark parser of the given text.
pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
pub fn new_cmark_parser<'text>(text: &'text str, options: &MarkdownOptions) -> Parser<'text> {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_FOOTNOTES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
if smart_punctuation {
if options.smart_punctuation {
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
}
Parser::new_ext(text, opts)
}
/// Renders markdown to HTML.
///
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
pub fn render_markdown_with_path(
text: &str,
smart_punctuation: bool,
path: Option<&Path>,
) -> String {
pub fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);
// Based on
@ -84,9 +103,9 @@ pub fn render_markdown_with_path(
// to figure out a way to do this just with pure CSS.
let mut prev_was_footnote = false;
let events = new_cmark_parser(text, smart_punctuation)
let events = new_cmark_parser(text, &options.markdown_options)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.map(|event| adjust_links(event, options))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
@ -98,7 +117,10 @@ pub fn render_markdown_with_path(
Event::Start(Tag::FootnoteDefinition(name)) => {
prev_was_footnote = false;
if !in_footnote.is_empty() {
log::warn!("internal bug: nested footnote not expected in {path:?}");
log::warn!(
"internal bug: nested footnote not expected in {:?}",
options.path
);
}
in_footnote_name = special_escape(&name);
None
@ -111,7 +133,7 @@ pub fn render_markdown_with_path(
log::warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
options.path.display()
);
} else {
footnote_defs.insert(name, def_events);
@ -162,7 +184,7 @@ pub fn render_markdown_with_path(
if !footnote_defs.is_empty() {
add_footnote_defs(
&mut body,
path,
options,
footnote_defs.into_iter().collect(),
&footnote_numbers,
);
@ -174,7 +196,7 @@ pub fn render_markdown_with_path(
/// Adds all footnote definitions into `body`.
fn add_footnote_defs(
body: &mut String,
path: Option<&Path>,
options: &HtmlRenderOptions<'_>,
mut defs: Vec<(String, Vec<Event<'_>>)>,
numbers: &HashMap<String, (usize, u32)>,
) {
@ -183,7 +205,7 @@ fn add_footnote_defs(
if !numbers.contains_key(name) {
log::warn!(
"footnote `{name}` in `{}` is defined but not referenced",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
options.path.display()
);
false
} else {
@ -270,17 +292,17 @@ fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
fn adjust_links<'a>(event: Event<'a>, options: &HtmlRenderOptions<'_>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix<'a>(dest: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if options.for_print {
let mut base = options.path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
@ -293,8 +315,9 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
if options.for_print {
let base = options
.path
.parent()
.expect("path can't be empty")
.to_str()
@ -318,7 +341,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
dest
}
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix_html<'a>(html: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
@ -332,7 +355,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
let fixed = fix(caps[2].into(), options);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
@ -347,7 +370,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, options),
title,
id,
}),
@ -358,12 +381,12 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, options),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
Event::Html(html) => Event::Html(fix_html(html, options)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, options)),
_ => event,
}
}

View file

@ -16,26 +16,28 @@ fn escaped_special() {
#[test]
fn preserves_external_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", false),
render_markdown("[example](https://www.rust-lang.org/)", &options),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](example.md)", false),
render_markdown("[example](example.md)", &options),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", false),
render_markdown("[example_anchor](example.md#anchor)", &options),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
// this anchor contains 'md' inside of it
assert_eq!(
render_markdown("[phantom data](foo.html#phantomdata)", false),
render_markdown("[phantom data](foo.html#phantomdata)", &options),
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
);
}
@ -53,12 +55,14 @@ fn it_can_wrap_tables() {
</tbody></table>
</div>
"#.trim();
assert_eq!(render_markdown(src, false), out);
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(src, &options), out);
}
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown("'one'", &options), "<p>'one'</p>\n");
}
#[test]
@ -74,7 +78,9 @@ fn it_can_make_quotes_curly_except_when_they_are_in_code() {
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -96,8 +102,10 @@ more text with spaces
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -109,8 +117,10 @@ fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -122,8 +132,10 @@ fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_clas
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -135,13 +147,17 @@ fn rust_code_block_without_properties_has_proper_html_class() {
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}

View file

@ -22,8 +22,8 @@ fn footnotes() {
cmd.expect_stderr(str![[r#"
[TIMESTAMP] [INFO] (mdbook_driver::mdbook): Book building has started
[TIMESTAMP] [INFO] (mdbook_driver::mdbook): Running the html backend
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `<unknown>` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [INFO] (mdbook_html::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book`