From f4012757a70ac99c909ccbd1e60cf1f1240a38ae Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Fri, 22 Aug 2025 16:17:41 -0700 Subject: [PATCH] Introduce options struct for markdown rendering This adds `MarkdownOptions` for creating the pulldown-cmark parser, and `HtmlRenderOptions` for converting markdown to HTML. These types should help make it easier to extend the rendering options while remaining semver compatible. It should also help with just general ergonomics of using these functions. --- .../src/html_handlebars/hbs_renderer.rs | 13 +-- .../mdbook-html/src/html_handlebars/search.rs | 4 +- crates/mdbook-html/src/lib.rs | 13 +++ crates/mdbook-markdown/src/lib.rs | 91 ++++++++++++------- crates/mdbook-markdown/src/tests.rs | 50 ++++++---- tests/testsuite/markdown.rs | 4 +- 6 files changed, 115 insertions(+), 60 deletions(-) diff --git a/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs b/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs index 73b2a141..e703752a 100644 --- a/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs +++ b/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs @@ -8,7 +8,7 @@ use mdbook_core::book::{Book, BookItem, Chapter}; use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition}; use mdbook_core::utils; use mdbook_core::utils::fs::get_404_output_file; -use mdbook_markdown::{render_markdown, render_markdown_with_path}; +use mdbook_markdown::render_markdown; use mdbook_renderer::{RenderContext, Renderer}; use regex::{Captures, Regex}; use serde_json::json; @@ -56,10 +56,10 @@ impl HtmlHandlebars { .insert("git_repository_edit_url".to_owned(), json!(edit_url)); } - let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation); - - let fixed_content = - render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation, Some(path)); + let mut options = crate::html_render_options_from_config(path, &ctx.html_config); + let content = render_markdown(&ch.content, &options); + options.for_print = true; + let fixed_content = render_markdown(&ch.content, &options); if prev_ch.is_some() && ctx.html_config.print.page_break { // Add page break between chapters // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before @@ -193,7 +193,8 @@ impl HtmlHandlebars { .to_string() } }; - let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation); + let options = crate::html_render_options_from_config(Path::new("404.md"), html_config); + let html_content_404 = render_markdown(&content_404, &options); let mut data_404 = data.clone(); let base_url = if let Some(site_url) = &html_config.site_url { diff --git a/crates/mdbook-html/src/html_handlebars/search.rs b/crates/mdbook-html/src/html_handlebars/search.rs index 25f19d26..1a235208 100644 --- a/crates/mdbook-html/src/html_handlebars/search.rs +++ b/crates/mdbook-html/src/html_handlebars/search.rs @@ -6,6 +6,7 @@ use log::{debug, warn}; use mdbook_core::book::{Book, BookItem, Chapter}; use mdbook_core::config::{Search, SearchChapterSettings}; use mdbook_core::utils; +use mdbook_markdown::HtmlRenderOptions; use mdbook_markdown::new_cmark_parser; use pulldown_cmark::*; use serde::Serialize; @@ -133,7 +134,8 @@ fn render_item( .with_context(|| "Could not convert HTML path to str")?; let anchor_base = utils::fs::normalize_path(filepath); - let mut p = new_cmark_parser(&chapter.content, false).peekable(); + let options = HtmlRenderOptions::new(&chapter_path); + let mut p = new_cmark_parser(&chapter.content, &options.markdown_options).peekable(); let mut in_heading = false; let max_section_depth = u32::from(search_config.heading_split_level); diff --git a/crates/mdbook-html/src/lib.rs b/crates/mdbook-html/src/lib.rs index 62b392a5..d3025e42 100644 --- a/crates/mdbook-html/src/lib.rs +++ b/crates/mdbook-html/src/lib.rs @@ -4,3 +4,16 @@ mod html_handlebars; pub mod theme; pub use html_handlebars::HtmlHandlebars; +use mdbook_core::config::HtmlConfig; +use mdbook_markdown::HtmlRenderOptions; +use std::path::Path; + +/// Creates an [`HtmlRenderOptions`] from the given config. +pub fn html_render_options_from_config<'a>( + path: &'a Path, + config: &HtmlConfig, +) -> HtmlRenderOptions<'a> { + let mut options = HtmlRenderOptions::new(path); + options.markdown_options.smart_punctuation = config.smart_punctuation; + options +} diff --git a/crates/mdbook-markdown/src/lib.rs b/crates/mdbook-markdown/src/lib.rs index b45d06b0..1925d9fa 100644 --- a/crates/mdbook-markdown/src/lib.rs +++ b/crates/mdbook-markdown/src/lib.rs @@ -11,7 +11,6 @@ use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html}; use regex::Regex; -use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Write; use std::path::Path; @@ -23,35 +22,55 @@ pub use pulldown_cmark; #[cfg(test)] mod tests; -/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. -pub fn render_markdown(text: &str, smart_punctuation: bool) -> String { - render_markdown_with_path(text, smart_punctuation, None) +/// Options for parsing markdown. +#[derive(Default)] +#[non_exhaustive] +pub struct MarkdownOptions { + /// Enables smart punctuation. + /// + /// Converts quotes to curly quotes, `...` to `…`, `--` to en-dash, and + /// `---` to em-dash. + pub smart_punctuation: bool, +} + +/// Options for converting markdown to HTML. +#[non_exhaustive] +pub struct HtmlRenderOptions<'a> { + /// Options for parsing markdown. + pub markdown_options: MarkdownOptions, + /// The chapter's location, relative to the `SUMMARY.md` file. + pub path: &'a Path, + /// If true, render for the print page. + pub for_print: bool, +} + +impl<'a> HtmlRenderOptions<'a> { + /// Creates a new [`HtmlRenderOptions`]. + pub fn new(path: &'a Path) -> HtmlRenderOptions<'a> { + HtmlRenderOptions { + markdown_options: MarkdownOptions::default(), + path, + for_print: false, + } + } } /// Creates a new pulldown-cmark parser of the given text. -pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> { +pub fn new_cmark_parser<'text>(text: &'text str, options: &MarkdownOptions) -> Parser<'text> { let mut opts = Options::empty(); opts.insert(Options::ENABLE_TABLES); opts.insert(Options::ENABLE_FOOTNOTES); opts.insert(Options::ENABLE_STRIKETHROUGH); opts.insert(Options::ENABLE_TASKLISTS); opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); - if smart_punctuation { + if options.smart_punctuation { opts.insert(Options::ENABLE_SMART_PUNCTUATION); } Parser::new_ext(text, opts) } /// Renders markdown to HTML. -/// -/// `path` should only be set if this is being generated for the consolidated -/// print page. It should point to the page being rendered relative to the -/// root of the book. -pub fn render_markdown_with_path( - text: &str, - smart_punctuation: bool, - path: Option<&Path>, -) -> String { +pub fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String { let mut body = String::with_capacity(text.len() * 3 / 2); // Based on @@ -84,9 +103,9 @@ pub fn render_markdown_with_path( // to figure out a way to do this just with pure CSS. let mut prev_was_footnote = false; - let events = new_cmark_parser(text, smart_punctuation) + let events = new_cmark_parser(text, &options.markdown_options) .map(clean_codeblock_headers) - .map(|event| adjust_links(event, path)) + .map(|event| adjust_links(event, options)) .flat_map(|event| { let (a, b) = wrap_tables(event); a.into_iter().chain(b) @@ -98,7 +117,10 @@ pub fn render_markdown_with_path( Event::Start(Tag::FootnoteDefinition(name)) => { prev_was_footnote = false; if !in_footnote.is_empty() { - log::warn!("internal bug: nested footnote not expected in {path:?}"); + log::warn!( + "internal bug: nested footnote not expected in {:?}", + options.path + ); } in_footnote_name = special_escape(&name); None @@ -111,7 +133,7 @@ pub fn render_markdown_with_path( log::warn!( "footnote `{name}` in {} defined multiple times - \ not updating to new definition", - path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) + options.path.display() ); } else { footnote_defs.insert(name, def_events); @@ -162,7 +184,7 @@ pub fn render_markdown_with_path( if !footnote_defs.is_empty() { add_footnote_defs( &mut body, - path, + options, footnote_defs.into_iter().collect(), &footnote_numbers, ); @@ -174,7 +196,7 @@ pub fn render_markdown_with_path( /// Adds all footnote definitions into `body`. fn add_footnote_defs( body: &mut String, - path: Option<&Path>, + options: &HtmlRenderOptions<'_>, mut defs: Vec<(String, Vec>)>, numbers: &HashMap, ) { @@ -183,7 +205,7 @@ fn add_footnote_defs( if !numbers.contains_key(name) { log::warn!( "footnote `{name}` in `{}` is defined but not referenced", - path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) + options.path.display() ); false } else { @@ -270,17 +292,17 @@ fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { /// page go to the original location. Normal page rendering sets `path` to /// None. Ideally, print page links would link to anchors on the print page, /// but that is very difficult. -fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { +fn adjust_links<'a>(event: Event<'a>, options: &HtmlRenderOptions<'_>) -> Event<'a> { static SCHEME_LINK: LazyLock = LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap()); static MD_LINK: LazyLock = LazyLock::new(|| Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap()); - fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + fn fix<'a>(dest: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> { if dest.starts_with('#') { // Fragment-only link. - if let Some(path) = path { - let mut base = path.display().to_string(); + if options.for_print { + let mut base = options.path.display().to_string(); if base.ends_with(".md") { base.replace_range(base.len() - 3.., ".html"); } @@ -293,8 +315,9 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { if !SCHEME_LINK.is_match(&dest) { // This is a relative link, adjust it as necessary. let mut fixed_link = String::new(); - if let Some(path) = path { - let base = path + if options.for_print { + let base = options + .path .parent() .expect("path can't be empty") .to_str() @@ -318,7 +341,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { dest } - fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + fn fix_html<'a>(html: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> { // This is a terrible hack, but should be reasonably reliable. Nobody // should ever parse a tag with a regex. However, there isn't anything // in Rust that I know of that is suitable for handling partial html @@ -332,7 +355,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { HTML_LINK .replace_all(&html, |caps: ®ex::Captures<'_>| { - let fixed = fix(caps[2].into(), path); + let fixed = fix(caps[2].into(), options); format!("{}{}\"", &caps[1], fixed) }) .into_owned() @@ -347,7 +370,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { id, }) => Event::Start(Tag::Link { link_type, - dest_url: fix(dest_url, path), + dest_url: fix(dest_url, options), title, id, }), @@ -358,12 +381,12 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { id, }) => Event::Start(Tag::Image { link_type, - dest_url: fix(dest_url, path), + dest_url: fix(dest_url, options), title, id, }), - Event::Html(html) => Event::Html(fix_html(html, path)), - Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)), + Event::Html(html) => Event::Html(fix_html(html, options)), + Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, options)), _ => event, } } diff --git a/crates/mdbook-markdown/src/tests.rs b/crates/mdbook-markdown/src/tests.rs index 7f2aea27..64f3000a 100644 --- a/crates/mdbook-markdown/src/tests.rs +++ b/crates/mdbook-markdown/src/tests.rs @@ -16,26 +16,28 @@ fn escaped_special() { #[test] fn preserves_external_links() { + let options = HtmlRenderOptions::new(&Path::new("")); assert_eq!( - render_markdown("[example](https://www.rust-lang.org/)", false), + render_markdown("[example](https://www.rust-lang.org/)", &options), "

example

\n" ); } #[test] fn it_can_adjust_markdown_links() { + let options = HtmlRenderOptions::new(&Path::new("")); assert_eq!( - render_markdown("[example](example.md)", false), + render_markdown("[example](example.md)", &options), "

example

\n" ); assert_eq!( - render_markdown("[example_anchor](example.md#anchor)", false), + render_markdown("[example_anchor](example.md#anchor)", &options), "

example_anchor

\n" ); // this anchor contains 'md' inside of it assert_eq!( - render_markdown("[phantom data](foo.html#phantomdata)", false), + render_markdown("[phantom data](foo.html#phantomdata)", &options), "

phantom data

\n" ); } @@ -53,12 +55,14 @@ fn it_can_wrap_tables() { "#.trim(); - assert_eq!(render_markdown(src, false), out); + let options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(src, &options), out); } #[test] fn it_can_keep_quotes_straight() { - assert_eq!(render_markdown("'one'", false), "

'one'

\n"); + let options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown("'one'", &options), "

'one'

\n"); } #[test] @@ -74,7 +78,9 @@ fn it_can_make_quotes_curly_except_when_they_are_in_code() {

'three' ‘four’

"#; - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); } #[test] @@ -96,8 +102,10 @@ more text with spaces

more text with spaces

"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(input, &options), expected); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); } #[test] @@ -109,8 +117,10 @@ fn rust_code_block_properties_are_passed_as_space_delimited_class() { let expected = r#"
"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(input, &options), expected); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); } #[test] @@ -122,8 +132,10 @@ fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_clas let expected = r#"
"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(input, &options), expected); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); } #[test] @@ -135,13 +147,17 @@ fn rust_code_block_without_properties_has_proper_html_class() { let expected = r#"
"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(input, &options), expected); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); let input = r#" ```rust ``` "#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); + let mut options = HtmlRenderOptions::new(&Path::new("")); + assert_eq!(render_markdown(input, &options), expected); + options.markdown_options.smart_punctuation = true; + assert_eq!(render_markdown(input, &options), expected); } diff --git a/tests/testsuite/markdown.rs b/tests/testsuite/markdown.rs index e8366a48..9e73f295 100644 --- a/tests/testsuite/markdown.rs +++ b/tests/testsuite/markdown.rs @@ -22,8 +22,8 @@ fn footnotes() { cmd.expect_stderr(str![[r#" [TIMESTAMP] [INFO] (mdbook_driver::mdbook): Book building has started [TIMESTAMP] [INFO] (mdbook_driver::mdbook): Running the html backend -[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in defined multiple times - not updating to new definition -[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `` is defined but not referenced +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced [TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition [TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced [TIMESTAMP] [INFO] (mdbook_html::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book`