Merge pull request #2809 from ehuss/markdown-options

Introduce options struct for markdown rendering
This commit is contained in:
Eric Huss 2025-08-22 23:23:58 +00:00 committed by GitHub
commit 6be8e526d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 115 additions and 60 deletions

View file

@ -8,7 +8,7 @@ use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils;
use mdbook_core::utils::fs::get_404_output_file;
use mdbook_markdown::{render_markdown, render_markdown_with_path};
use mdbook_markdown::render_markdown;
use mdbook_renderer::{RenderContext, Renderer};
use regex::{Captures, Regex};
use serde_json::json;
@ -56,10 +56,10 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}
let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation);
let fixed_content =
render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation, Some(path));
let mut options = crate::html_render_options_from_config(path, &ctx.html_config);
let content = render_markdown(&ch.content, &options);
options.for_print = true;
let fixed_content = render_markdown(&ch.content, &options);
if prev_ch.is_some() && ctx.html_config.print.page_break {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@ -193,7 +193,8 @@ impl HtmlHandlebars {
.to_string()
}
};
let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation);
let options = crate::html_render_options_from_config(Path::new("404.md"), html_config);
let html_content_404 = render_markdown(&content_404, &options);
let mut data_404 = data.clone();
let base_url = if let Some(site_url) = &html_config.site_url {

View file

@ -6,6 +6,7 @@ use log::{debug, warn};
use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{Search, SearchChapterSettings};
use mdbook_core::utils;
use mdbook_markdown::HtmlRenderOptions;
use mdbook_markdown::new_cmark_parser;
use pulldown_cmark::*;
use serde::Serialize;
@ -133,7 +134,8 @@ fn render_item(
.with_context(|| "Could not convert HTML path to str")?;
let anchor_base = utils::fs::normalize_path(filepath);
let mut p = new_cmark_parser(&chapter.content, false).peekable();
let options = HtmlRenderOptions::new(&chapter_path);
let mut p = new_cmark_parser(&chapter.content, &options.markdown_options).peekable();
let mut in_heading = false;
let max_section_depth = u32::from(search_config.heading_split_level);

View file

@ -4,3 +4,16 @@ mod html_handlebars;
pub mod theme;
pub use html_handlebars::HtmlHandlebars;
use mdbook_core::config::HtmlConfig;
use mdbook_markdown::HtmlRenderOptions;
use std::path::Path;
/// Creates an [`HtmlRenderOptions`] from the given config.
pub fn html_render_options_from_config<'a>(
path: &'a Path,
config: &HtmlConfig,
) -> HtmlRenderOptions<'a> {
let mut options = HtmlRenderOptions::new(path);
options.markdown_options.smart_punctuation = config.smart_punctuation;
options
}

View file

@ -11,7 +11,6 @@
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
@ -23,35 +22,55 @@ pub use pulldown_cmark;
#[cfg(test)]
mod tests;
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
render_markdown_with_path(text, smart_punctuation, None)
/// Options for parsing markdown.
#[derive(Default)]
#[non_exhaustive]
pub struct MarkdownOptions {
/// Enables smart punctuation.
///
/// Converts quotes to curly quotes, `...` to `…`, `--` to en-dash, and
/// `---` to em-dash.
pub smart_punctuation: bool,
}
/// Options for converting markdown to HTML.
#[non_exhaustive]
pub struct HtmlRenderOptions<'a> {
/// Options for parsing markdown.
pub markdown_options: MarkdownOptions,
/// The chapter's location, relative to the `SUMMARY.md` file.
pub path: &'a Path,
/// If true, render for the print page.
pub for_print: bool,
}
impl<'a> HtmlRenderOptions<'a> {
/// Creates a new [`HtmlRenderOptions`].
pub fn new(path: &'a Path) -> HtmlRenderOptions<'a> {
HtmlRenderOptions {
markdown_options: MarkdownOptions::default(),
path,
for_print: false,
}
}
}
/// Creates a new pulldown-cmark parser of the given text.
pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
pub fn new_cmark_parser<'text>(text: &'text str, options: &MarkdownOptions) -> Parser<'text> {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_FOOTNOTES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
if smart_punctuation {
if options.smart_punctuation {
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
}
Parser::new_ext(text, opts)
}
/// Renders markdown to HTML.
///
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
pub fn render_markdown_with_path(
text: &str,
smart_punctuation: bool,
path: Option<&Path>,
) -> String {
pub fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);
// Based on
@ -84,9 +103,9 @@ pub fn render_markdown_with_path(
// to figure out a way to do this just with pure CSS.
let mut prev_was_footnote = false;
let events = new_cmark_parser(text, smart_punctuation)
let events = new_cmark_parser(text, &options.markdown_options)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.map(|event| adjust_links(event, options))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
@ -98,7 +117,10 @@ pub fn render_markdown_with_path(
Event::Start(Tag::FootnoteDefinition(name)) => {
prev_was_footnote = false;
if !in_footnote.is_empty() {
log::warn!("internal bug: nested footnote not expected in {path:?}");
log::warn!(
"internal bug: nested footnote not expected in {:?}",
options.path
);
}
in_footnote_name = special_escape(&name);
None
@ -111,7 +133,7 @@ pub fn render_markdown_with_path(
log::warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
options.path.display()
);
} else {
footnote_defs.insert(name, def_events);
@ -162,7 +184,7 @@ pub fn render_markdown_with_path(
if !footnote_defs.is_empty() {
add_footnote_defs(
&mut body,
path,
options,
footnote_defs.into_iter().collect(),
&footnote_numbers,
);
@ -174,7 +196,7 @@ pub fn render_markdown_with_path(
/// Adds all footnote definitions into `body`.
fn add_footnote_defs(
body: &mut String,
path: Option<&Path>,
options: &HtmlRenderOptions<'_>,
mut defs: Vec<(String, Vec<Event<'_>>)>,
numbers: &HashMap<String, (usize, u32)>,
) {
@ -183,7 +205,7 @@ fn add_footnote_defs(
if !numbers.contains_key(name) {
log::warn!(
"footnote `{name}` in `{}` is defined but not referenced",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
options.path.display()
);
false
} else {
@ -270,17 +292,17 @@ fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
fn adjust_links<'a>(event: Event<'a>, options: &HtmlRenderOptions<'_>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix<'a>(dest: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if options.for_print {
let mut base = options.path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
@ -293,8 +315,9 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
if options.for_print {
let base = options
.path
.parent()
.expect("path can't be empty")
.to_str()
@ -318,7 +341,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
dest
}
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix_html<'a>(html: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
@ -332,7 +355,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
let fixed = fix(caps[2].into(), options);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
@ -347,7 +370,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, options),
title,
id,
}),
@ -358,12 +381,12 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, options),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
Event::Html(html) => Event::Html(fix_html(html, options)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, options)),
_ => event,
}
}

View file

@ -16,26 +16,28 @@ fn escaped_special() {
#[test]
fn preserves_external_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", false),
render_markdown("[example](https://www.rust-lang.org/)", &options),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](example.md)", false),
render_markdown("[example](example.md)", &options),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", false),
render_markdown("[example_anchor](example.md#anchor)", &options),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
// this anchor contains 'md' inside of it
assert_eq!(
render_markdown("[phantom data](foo.html#phantomdata)", false),
render_markdown("[phantom data](foo.html#phantomdata)", &options),
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
);
}
@ -53,12 +55,14 @@ fn it_can_wrap_tables() {
</tbody></table>
</div>
"#.trim();
assert_eq!(render_markdown(src, false), out);
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(src, &options), out);
}
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown("'one'", &options), "<p>'one'</p>\n");
}
#[test]
@ -74,7 +78,9 @@ fn it_can_make_quotes_curly_except_when_they_are_in_code() {
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -96,8 +102,10 @@ more text with spaces
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -109,8 +117,10 @@ fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -122,8 +132,10 @@ fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_clas
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
@ -135,13 +147,17 @@ fn rust_code_block_without_properties_has_proper_html_class() {
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let mut options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
options.markdown_options.smart_punctuation = true;
assert_eq!(render_markdown(input, &options), expected);
}

View file

@ -22,8 +22,8 @@ fn footnotes() {
cmd.expect_stderr(str![[r#"
[TIMESTAMP] [INFO] (mdbook_driver::mdbook): Book building has started
[TIMESTAMP] [INFO] (mdbook_driver::mdbook): Running the html backend
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `<unknown>` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [INFO] (mdbook_html::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book`