From 8f3b6b4776f641a6f5a6479e47b8611d8acae73b Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Mon, 21 Jul 2025 15:46:36 -0700 Subject: [PATCH] Move markdown support to mdbook-markdown This moves all the code responsible for markdown processing to the mdbook-markdown crate. --- Cargo.lock | 10 + Cargo.toml | 2 + crates/mdbook-core/src/utils/mod.rs | 507 +------------------ crates/mdbook-markdown/Cargo.toml | 16 + crates/mdbook-markdown/src/lib.rs | 367 ++++++++++++++ crates/mdbook-markdown/src/tests.rs | 147 ++++++ src/renderer/html_handlebars/hbs_renderer.rs | 14 +- src/renderer/html_handlebars/helpers/toc.rs | 3 +- src/renderer/html_handlebars/search.rs | 3 +- tests/testsuite/markdown.rs | 8 +- 10 files changed, 556 insertions(+), 521 deletions(-) create mode 100644 crates/mdbook-markdown/Cargo.toml create mode 100644 crates/mdbook-markdown/src/lib.rs create mode 100644 crates/mdbook-markdown/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 5c4626ad..2a5589f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1271,6 +1271,7 @@ dependencies = [ "ignore", "log", "mdbook-core", + "mdbook-markdown", "mdbook-preprocessor", "mdbook-renderer", "mdbook-summary", @@ -1311,6 +1312,15 @@ dependencies = [ "toml", ] +[[package]] +name = "mdbook-markdown" +version = "0.5.0-alpha.1" +dependencies = [ + "log", + "pulldown-cmark 0.10.3", + "regex", +] + [[package]] name = "mdbook-preprocessor" version = "0.5.0-alpha.1" diff --git a/Cargo.toml b/Cargo.toml index 2c3cf492..b4cc16fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow anyhow = "1.0.98" log = "0.4.27" mdbook-core = { path = "crates/mdbook-core" } +mdbook-markdown = { path = "crates/mdbook-markdown" } mdbook-preprocessor = { path = "crates/mdbook-preprocessor" } mdbook-renderer = { path = "crates/mdbook-renderer" } mdbook-summary = { path = "crates/mdbook-summary" } @@ -63,6 +64,7 @@ handlebars = "6.0" hex = "0.4.3" log.workspace = true mdbook-core.workspace = true +mdbook-markdown.workspace = true mdbook-preprocessor.workspace = true mdbook-renderer.workspace = true mdbook-summary.workspace = true diff --git a/crates/mdbook-core/src/utils/mod.rs b/crates/mdbook-core/src/utils/mod.rs index 56bfbb97..caca5dcd 100644 --- a/crates/mdbook-core/src/utils/mod.rs +++ b/crates/mdbook-core/src/utils/mod.rs @@ -2,12 +2,9 @@ use anyhow::Error; use log::error; -use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html}; use regex::Regex; use std::borrow::Cow; use std::collections::HashMap; -use std::fmt::Write; -use std::path::Path; use std::sync::LazyLock; pub mod fs; @@ -83,338 +80,6 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap(event: Event<'a>, path: Option<&Path>) -> Event<'a> { - static SCHEME_LINK: LazyLock = - LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap()); - static MD_LINK: LazyLock = - LazyLock::new(|| Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap()); - - fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { - if dest.starts_with('#') { - // Fragment-only link. - if let Some(path) = path { - let mut base = path.display().to_string(); - if base.ends_with(".md") { - base.replace_range(base.len() - 3.., ".html"); - } - return format!("{base}{dest}").into(); - } else { - return dest; - } - } - // Don't modify links with schemes like `https`. - if !SCHEME_LINK.is_match(&dest) { - // This is a relative link, adjust it as necessary. - let mut fixed_link = String::new(); - if let Some(path) = path { - let base = path - .parent() - .expect("path can't be empty") - .to_str() - .expect("utf-8 paths only"); - if !base.is_empty() { - write!(fixed_link, "{base}/").unwrap(); - } - } - - if let Some(caps) = MD_LINK.captures(&dest) { - fixed_link.push_str(&caps["link"]); - fixed_link.push_str(".html"); - if let Some(anchor) = caps.name("anchor") { - fixed_link.push_str(anchor.as_str()); - } - } else { - fixed_link.push_str(&dest); - }; - return CowStr::from(fixed_link); - } - dest - } - - fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { - // This is a terrible hack, but should be reasonably reliable. Nobody - // should ever parse a tag with a regex. However, there isn't anything - // in Rust that I know of that is suitable for handling partial html - // fragments like those generated by pulldown_cmark. - // - // There are dozens of HTML tags/attributes that contain paths, so - // feel free to add more tags if desired; these are the only ones I - // care about right now. - static HTML_LINK: LazyLock = - LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap()); - - HTML_LINK - .replace_all(&html, |caps: ®ex::Captures<'_>| { - let fixed = fix(caps[2].into(), path); - format!("{}{}\"", &caps[1], fixed) - }) - .into_owned() - .into() - } - - match event { - Event::Start(Tag::Link { - link_type, - dest_url, - title, - id, - }) => Event::Start(Tag::Link { - link_type, - dest_url: fix(dest_url, path), - title, - id, - }), - Event::Start(Tag::Image { - link_type, - dest_url, - title, - id, - }) => Event::Start(Tag::Image { - link_type, - dest_url: fix(dest_url, path), - title, - id, - }), - Event::Html(html) => Event::Html(fix_html(html, path)), - Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)), - _ => event, - } -} - -/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. -pub fn render_markdown(text: &str, smart_punctuation: bool) -> String { - render_markdown_with_path(text, smart_punctuation, None) -} - -/// Creates a new pulldown-cmark parser of the given text. -pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> { - let mut opts = Options::empty(); - opts.insert(Options::ENABLE_TABLES); - opts.insert(Options::ENABLE_FOOTNOTES); - opts.insert(Options::ENABLE_STRIKETHROUGH); - opts.insert(Options::ENABLE_TASKLISTS); - opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); - if smart_punctuation { - opts.insert(Options::ENABLE_SMART_PUNCTUATION); - } - Parser::new_ext(text, opts) -} - -/// Renders markdown to HTML. -/// -/// `path` should only be set if this is being generated for the consolidated -/// print page. It should point to the page being rendered relative to the -/// root of the book. -pub fn render_markdown_with_path( - text: &str, - smart_punctuation: bool, - path: Option<&Path>, -) -> String { - let mut body = String::with_capacity(text.len() * 3 / 2); - - // Based on - // https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs - - // This handling of footnotes is a two-pass process. This is done to - // support linkbacks, little arrows that allow you to jump back to the - // footnote reference. The first pass collects the footnote definitions. - // The second pass modifies those definitions to include the linkbacks, - // and inserts the definitions back into the `events` list. - - // This is a map of name -> (number, count) - // `name` is the name of the footnote. - // `number` is the footnote number displayed in the output. - // `count` is the number of references to this footnote (used for multiple - // linkbacks, and checking for unused footnotes). - let mut footnote_numbers = HashMap::new(); - // This is a map of name -> Vec - // `name` is the name of the footnote. - // The events list is the list of events needed to build the footnote definition. - let mut footnote_defs = HashMap::new(); - - // The following are used when currently processing a footnote definition. - // - // This is the name of the footnote (escaped). - let mut in_footnote_name = String::new(); - // This is the list of events to build the footnote definition. - let mut in_footnote = Vec::new(); - - let events = new_cmark_parser(text, smart_punctuation) - .map(clean_codeblock_headers) - .map(|event| adjust_links(event, path)) - .flat_map(|event| { - let (a, b) = wrap_tables(event); - a.into_iter().chain(b) - }) - // Footnote rewriting must go last to ensure inner definition contents - // are processed (since they get pulled out of the initial stream). - .filter_map(|event| { - match event { - Event::Start(Tag::FootnoteDefinition(name)) => { - if !in_footnote.is_empty() { - log::warn!("internal bug: nested footnote not expected in {path:?}"); - } - in_footnote_name = special_escape(&name); - None - } - Event::End(TagEnd::FootnoteDefinition) => { - let def_events = std::mem::take(&mut in_footnote); - let name = std::mem::take(&mut in_footnote_name); - - if footnote_defs.contains_key(&name) { - log::warn!( - "footnote `{name}` in {} defined multiple times - \ - not updating to new definition", - path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) - ); - } else { - footnote_defs.insert(name, def_events); - } - None - } - Event::FootnoteReference(name) => { - let name = special_escape(&name); - let len = footnote_numbers.len() + 1; - let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0)); - *count += 1; - let html = Event::Html( - format!( - "\ - {n}\ - " - ) - .into(), - ); - if in_footnote_name.is_empty() { - Some(html) - } else { - // While inside a footnote, we need to accumulate. - in_footnote.push(html); - None - } - } - // While inside a footnote, accumulate all events into a local. - _ if !in_footnote_name.is_empty() => { - in_footnote.push(event); - None - } - _ => Some(event), - } - }); - - html::push_html(&mut body, events); - - if !footnote_defs.is_empty() { - add_footnote_defs( - &mut body, - path, - footnote_defs.into_iter().collect(), - &footnote_numbers, - ); - } - - body -} - -/// Adds all footnote definitions into `body`. -fn add_footnote_defs( - body: &mut String, - path: Option<&Path>, - mut defs: Vec<(String, Vec>)>, - numbers: &HashMap, -) { - // Remove unused. - defs.retain(|(name, _)| { - if !numbers.contains_key(name) { - log::warn!( - "footnote `{name}` in `{}` is defined but not referenced", - path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) - ); - false - } else { - true - } - }); - - defs.sort_by_cached_key(|(name, _)| numbers[name].0); - - body.push_str( - "
\n\ -
    ", - ); - - // Insert the backrefs to the definition, and put the definitions in the output. - for (name, mut fn_events) in defs { - let count = numbers[&name].1; - fn_events.insert( - 0, - Event::Html(format!("
  1. ").into()), - ); - // Generate the linkbacks. - for usage in 1..=count { - let nth = if usage == 1 { - String::new() - } else { - usage.to_string() - }; - let backlink = - Event::Html(format!(" ↩{nth}").into()); - if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) { - // Put the linkback at the end of the last paragraph instead - // of on a line by itself. - fn_events.insert(fn_events.len() - 1, backlink); - } else { - // Not a clear place to put it in this circumstance, so put it - // at the end. - fn_events.push(backlink); - } - } - fn_events.push(Event::Html("
  2. \n".into())); - html::push_html(body, fn_events.into_iter()); - } - - body.push_str("
"); -} - -/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to. -fn wrap_tables(event: Event<'_>) -> (Option>, Option>) { - match event { - Event::Start(Tag::Table(_)) => ( - Some(Event::Html(r#"
"#.into())), - Some(event), - ), - Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"
"#.into()))), - _ => (Some(event), None), - } -} - -fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { - match event { - Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => { - let info: String = info - .chars() - .map(|x| match x { - ' ' | '\t' => ',', - _ => x, - }) - .filter(|ch| !ch.is_whitespace()) - .collect(); - - Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info)))) - } - _ => event, - } -} - /// Prints a "backtrace" of some `Error`. pub fn log_backtrace(e: &Error) { error!("Error: {}", e); @@ -424,27 +89,6 @@ pub fn log_backtrace(e: &Error) { } } -/// Escape characters to make it safe for an HTML string. -pub fn special_escape(mut s: &str) -> String { - let mut escaped = String::with_capacity(s.len()); - let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&']; - while let Some(next) = s.find(needs_escape) { - escaped.push_str(&s[..next]); - match s.as_bytes()[next] { - b'<' => escaped.push_str("<"), - b'>' => escaped.push_str(">"), - b'\'' => escaped.push_str("'"), - b'"' => escaped.push_str("""), - b'\\' => escaped.push_str("\"), - b'&' => escaped.push_str("&"), - _ => unreachable!(), - } - s = &s[next + 1..]; - } - escaped.push_str(s); - escaped -} - /// Escape `<` and `>` for HTML. pub fn bracket_escape(mut s: &str) -> String { let mut escaped = String::with_capacity(s.len()); @@ -464,143 +108,7 @@ pub fn bracket_escape(mut s: &str) -> String { #[cfg(test)] mod tests { - use super::{bracket_escape, special_escape}; - - mod render_markdown { - use super::super::render_markdown; - - #[test] - fn preserves_external_links() { - assert_eq!( - render_markdown("[example](https://www.rust-lang.org/)", false), - "

example

\n" - ); - } - - #[test] - fn it_can_adjust_markdown_links() { - assert_eq!( - render_markdown("[example](example.md)", false), - "

example

\n" - ); - assert_eq!( - render_markdown("[example_anchor](example.md#anchor)", false), - "

example_anchor

\n" - ); - - // this anchor contains 'md' inside of it - assert_eq!( - render_markdown("[phantom data](foo.html#phantomdata)", false), - "

phantom data

\n" - ); - } - - #[test] - fn it_can_wrap_tables() { - let src = r#" -| Original | Punycode | Punycode + Encoding | -|-----------------|-----------------|---------------------| -| føø | f-5gaa | f_5gaa | -"#; - let out = r#" -
- -
OriginalPunycodePunycode + Encoding
føøf-5gaaf_5gaa
-
-"#.trim(); - assert_eq!(render_markdown(src, false), out); - } - - #[test] - fn it_can_keep_quotes_straight() { - assert_eq!(render_markdown("'one'", false), "

'one'

\n"); - } - - #[test] - fn it_can_make_quotes_curly_except_when_they_are_in_code() { - let input = r#" -'one' -``` -'two' -``` -`'three'` 'four'"#; - let expected = r#"

‘one’

-
'two'
-
-

'three' ‘four’

-"#; - assert_eq!(render_markdown(input, true), expected); - } - - #[test] - fn whitespace_outside_of_codeblock_header_is_preserved() { - let input = r#" -some text with spaces -```rust -fn main() { -// code inside is unchanged -} -``` -more text with spaces -"#; - - let expected = r#"

some text with spaces

-
fn main() {
-// code inside is unchanged
-}
-
-

more text with spaces

-"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); - } - - #[test] - fn rust_code_block_properties_are_passed_as_space_delimited_class() { - let input = r#" -```rust,no_run,should_panic,property_3 -``` -"#; - - let expected = r#"
-"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); - } - - #[test] - fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() { - let input = r#" -```rust, no_run,,,should_panic , ,property_3 -``` -"#; - - let expected = r#"
-"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); - } - - #[test] - fn rust_code_block_without_properties_has_proper_html_class() { - let input = r#" -```rust -``` -"#; - - let expected = r#"
-"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); - - let input = r#" -```rust -``` -"#; - assert_eq!(render_markdown(input, false), expected); - assert_eq!(render_markdown(input, true), expected); - } - } + use super::bracket_escape; #[allow(deprecated)] mod id_from_content { @@ -690,17 +198,4 @@ more text with spaces assert_eq!(bracket_escape("'"), "'"); assert_eq!(bracket_escape("\\"), "\\"); } - - #[test] - fn escaped_special() { - assert_eq!(special_escape(""), ""); - assert_eq!(special_escape("<"), "<"); - assert_eq!(special_escape(">"), ">"); - assert_eq!(special_escape("<>"), "<>"); - assert_eq!(special_escape(""), "<test>"); - assert_eq!(special_escape("ab"), "a<test>b"); - assert_eq!(special_escape("'"), "'"); - assert_eq!(special_escape("\\"), "\"); - assert_eq!(special_escape("&"), "&"); - } } diff --git a/crates/mdbook-markdown/Cargo.toml b/crates/mdbook-markdown/Cargo.toml new file mode 100644 index 00000000..0dc3e723 --- /dev/null +++ b/crates/mdbook-markdown/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "mdbook-markdown" +version = "0.5.0-alpha.1" +description = "Markdown processing used in mdBook" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +log.workspace = true +pulldown-cmark.workspace = true +regex.workspace = true + +[lints] +workspace = true diff --git a/crates/mdbook-markdown/src/lib.rs b/crates/mdbook-markdown/src/lib.rs new file mode 100644 index 00000000..5b506fa7 --- /dev/null +++ b/crates/mdbook-markdown/src/lib.rs @@ -0,0 +1,367 @@ +//! Markdown processing used in mdBook. + +use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html}; +use regex::Regex; +use std::borrow::Cow; +use std::collections::HashMap; +use std::fmt::Write; +use std::path::Path; +use std::sync::LazyLock; + +pub use pulldown_cmark; + +#[cfg(test)] +mod tests; + +/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. +pub fn render_markdown(text: &str, smart_punctuation: bool) -> String { + render_markdown_with_path(text, smart_punctuation, None) +} + +/// Creates a new pulldown-cmark parser of the given text. +pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> { + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_STRIKETHROUGH); + opts.insert(Options::ENABLE_TASKLISTS); + opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); + if smart_punctuation { + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + } + Parser::new_ext(text, opts) +} + +/// Renders markdown to HTML. +/// +/// `path` should only be set if this is being generated for the consolidated +/// print page. It should point to the page being rendered relative to the +/// root of the book. +pub fn render_markdown_with_path( + text: &str, + smart_punctuation: bool, + path: Option<&Path>, +) -> String { + let mut body = String::with_capacity(text.len() * 3 / 2); + + // Based on + // https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs + + // This handling of footnotes is a two-pass process. This is done to + // support linkbacks, little arrows that allow you to jump back to the + // footnote reference. The first pass collects the footnote definitions. + // The second pass modifies those definitions to include the linkbacks, + // and inserts the definitions back into the `events` list. + + // This is a map of name -> (number, count) + // `name` is the name of the footnote. + // `number` is the footnote number displayed in the output. + // `count` is the number of references to this footnote (used for multiple + // linkbacks, and checking for unused footnotes). + let mut footnote_numbers = HashMap::new(); + // This is a map of name -> Vec + // `name` is the name of the footnote. + // The events list is the list of events needed to build the footnote definition. + let mut footnote_defs = HashMap::new(); + + // The following are used when currently processing a footnote definition. + // + // This is the name of the footnote (escaped). + let mut in_footnote_name = String::new(); + // This is the list of events to build the footnote definition. + let mut in_footnote = Vec::new(); + + let events = new_cmark_parser(text, smart_punctuation) + .map(clean_codeblock_headers) + .map(|event| adjust_links(event, path)) + .flat_map(|event| { + let (a, b) = wrap_tables(event); + a.into_iter().chain(b) + }) + // Footnote rewriting must go last to ensure inner definition contents + // are processed (since they get pulled out of the initial stream). + .filter_map(|event| { + match event { + Event::Start(Tag::FootnoteDefinition(name)) => { + if !in_footnote.is_empty() { + log::warn!("internal bug: nested footnote not expected in {path:?}"); + } + in_footnote_name = special_escape(&name); + None + } + Event::End(TagEnd::FootnoteDefinition) => { + let def_events = std::mem::take(&mut in_footnote); + let name = std::mem::take(&mut in_footnote_name); + + if footnote_defs.contains_key(&name) { + log::warn!( + "footnote `{name}` in {} defined multiple times - \ + not updating to new definition", + path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) + ); + } else { + footnote_defs.insert(name, def_events); + } + None + } + Event::FootnoteReference(name) => { + let name = special_escape(&name); + let len = footnote_numbers.len() + 1; + let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0)); + *count += 1; + let html = Event::Html( + format!( + "\ + {n}\ + " + ) + .into(), + ); + if in_footnote_name.is_empty() { + Some(html) + } else { + // While inside a footnote, we need to accumulate. + in_footnote.push(html); + None + } + } + // While inside a footnote, accumulate all events into a local. + _ if !in_footnote_name.is_empty() => { + in_footnote.push(event); + None + } + _ => Some(event), + } + }); + + html::push_html(&mut body, events); + + if !footnote_defs.is_empty() { + add_footnote_defs( + &mut body, + path, + footnote_defs.into_iter().collect(), + &footnote_numbers, + ); + } + + body +} + +/// Adds all footnote definitions into `body`. +fn add_footnote_defs( + body: &mut String, + path: Option<&Path>, + mut defs: Vec<(String, Vec>)>, + numbers: &HashMap, +) { + // Remove unused. + defs.retain(|(name, _)| { + if !numbers.contains_key(name) { + log::warn!( + "footnote `{name}` in `{}` is defined but not referenced", + path.map_or_else(|| Cow::from(""), |p| p.to_string_lossy()) + ); + false + } else { + true + } + }); + + defs.sort_by_cached_key(|(name, _)| numbers[name].0); + + body.push_str( + "
\n\ +
    ", + ); + + // Insert the backrefs to the definition, and put the definitions in the output. + for (name, mut fn_events) in defs { + let count = numbers[&name].1; + fn_events.insert( + 0, + Event::Html(format!("
  1. ").into()), + ); + // Generate the linkbacks. + for usage in 1..=count { + let nth = if usage == 1 { + String::new() + } else { + usage.to_string() + }; + let backlink = + Event::Html(format!(" ↩{nth}").into()); + if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) { + // Put the linkback at the end of the last paragraph instead + // of on a line by itself. + fn_events.insert(fn_events.len() - 1, backlink); + } else { + // Not a clear place to put it in this circumstance, so put it + // at the end. + fn_events.push(backlink); + } + } + fn_events.push(Event::Html("
  2. \n".into())); + html::push_html(body, fn_events.into_iter()); + } + + body.push_str("
"); +} + +/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to. +fn wrap_tables(event: Event<'_>) -> (Option>, Option>) { + match event { + Event::Start(Tag::Table(_)) => ( + Some(Event::Html(r#"
"#.into())), + Some(event), + ), + Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"
"#.into()))), + _ => (Some(event), None), + } +} + +fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { + match event { + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => { + let info: String = info + .chars() + .map(|x| match x { + ' ' | '\t' => ',', + _ => x, + }) + .filter(|ch| !ch.is_whitespace()) + .collect(); + + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info)))) + } + _ => event, + } +} + +/// Fix links to the correct location. +/// +/// This adjusts links, such as turning `.md` extensions to `.html`. +/// +/// `path` is the path to the page being rendered relative to the root of the +/// book. This is used for the `print.html` page so that links on the print +/// page go to the original location. Normal page rendering sets `path` to +/// None. Ideally, print page links would link to anchors on the print page, +/// but that is very difficult. +fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { + static SCHEME_LINK: LazyLock = + LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap()); + static MD_LINK: LazyLock = + LazyLock::new(|| Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap()); + + fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + if dest.starts_with('#') { + // Fragment-only link. + if let Some(path) = path { + let mut base = path.display().to_string(); + if base.ends_with(".md") { + base.replace_range(base.len() - 3.., ".html"); + } + return format!("{base}{dest}").into(); + } else { + return dest; + } + } + // Don't modify links with schemes like `https`. + if !SCHEME_LINK.is_match(&dest) { + // This is a relative link, adjust it as necessary. + let mut fixed_link = String::new(); + if let Some(path) = path { + let base = path + .parent() + .expect("path can't be empty") + .to_str() + .expect("utf-8 paths only"); + if !base.is_empty() { + write!(fixed_link, "{base}/").unwrap(); + } + } + + if let Some(caps) = MD_LINK.captures(&dest) { + fixed_link.push_str(&caps["link"]); + fixed_link.push_str(".html"); + if let Some(anchor) = caps.name("anchor") { + fixed_link.push_str(anchor.as_str()); + } + } else { + fixed_link.push_str(&dest); + }; + return CowStr::from(fixed_link); + } + dest + } + + fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + // This is a terrible hack, but should be reasonably reliable. Nobody + // should ever parse a tag with a regex. However, there isn't anything + // in Rust that I know of that is suitable for handling partial html + // fragments like those generated by pulldown_cmark. + // + // There are dozens of HTML tags/attributes that contain paths, so + // feel free to add more tags if desired; these are the only ones I + // care about right now. + static HTML_LINK: LazyLock = + LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap()); + + HTML_LINK + .replace_all(&html, |caps: ®ex::Captures<'_>| { + let fixed = fix(caps[2].into(), path); + format!("{}{}\"", &caps[1], fixed) + }) + .into_owned() + .into() + } + + match event { + Event::Start(Tag::Link { + link_type, + dest_url, + title, + id, + }) => Event::Start(Tag::Link { + link_type, + dest_url: fix(dest_url, path), + title, + id, + }), + Event::Start(Tag::Image { + link_type, + dest_url, + title, + id, + }) => Event::Start(Tag::Image { + link_type, + dest_url: fix(dest_url, path), + title, + id, + }), + Event::Html(html) => Event::Html(fix_html(html, path)), + Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)), + _ => event, + } +} + +/// Escape characters to make it safe for an HTML string. +pub fn special_escape(mut s: &str) -> String { + let mut escaped = String::with_capacity(s.len()); + let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&']; + while let Some(next) = s.find(needs_escape) { + escaped.push_str(&s[..next]); + match s.as_bytes()[next] { + b'<' => escaped.push_str("<"), + b'>' => escaped.push_str(">"), + b'\'' => escaped.push_str("'"), + b'"' => escaped.push_str("""), + b'\\' => escaped.push_str("\"), + b'&' => escaped.push_str("&"), + _ => unreachable!(), + } + s = &s[next + 1..]; + } + escaped.push_str(s); + escaped +} diff --git a/crates/mdbook-markdown/src/tests.rs b/crates/mdbook-markdown/src/tests.rs new file mode 100644 index 00000000..7f2aea27 --- /dev/null +++ b/crates/mdbook-markdown/src/tests.rs @@ -0,0 +1,147 @@ +use super::render_markdown; +use super::*; + +#[test] +fn escaped_special() { + assert_eq!(special_escape(""), ""); + assert_eq!(special_escape("<"), "<"); + assert_eq!(special_escape(">"), ">"); + assert_eq!(special_escape("<>"), "<>"); + assert_eq!(special_escape(""), "<test>"); + assert_eq!(special_escape("ab"), "a<test>b"); + assert_eq!(special_escape("'"), "'"); + assert_eq!(special_escape("\\"), "\"); + assert_eq!(special_escape("&"), "&"); +} + +#[test] +fn preserves_external_links() { + assert_eq!( + render_markdown("[example](https://www.rust-lang.org/)", false), + "

example

\n" + ); +} + +#[test] +fn it_can_adjust_markdown_links() { + assert_eq!( + render_markdown("[example](example.md)", false), + "

example

\n" + ); + assert_eq!( + render_markdown("[example_anchor](example.md#anchor)", false), + "

example_anchor

\n" + ); + + // this anchor contains 'md' inside of it + assert_eq!( + render_markdown("[phantom data](foo.html#phantomdata)", false), + "

phantom data

\n" + ); +} + +#[test] +fn it_can_wrap_tables() { + let src = r#" +| Original | Punycode | Punycode + Encoding | +|-----------------|-----------------|---------------------| +| føø | f-5gaa | f_5gaa | +"#; + let out = r#" +
+ +
OriginalPunycodePunycode + Encoding
føøf-5gaaf_5gaa
+
+"#.trim(); + assert_eq!(render_markdown(src, false), out); +} + +#[test] +fn it_can_keep_quotes_straight() { + assert_eq!(render_markdown("'one'", false), "

'one'

\n"); +} + +#[test] +fn it_can_make_quotes_curly_except_when_they_are_in_code() { + let input = r#" +'one' +``` +'two' +``` +`'three'` 'four'"#; + let expected = r#"

‘one’

+
'two'
+
+

'three' ‘four’

+"#; + assert_eq!(render_markdown(input, true), expected); +} + +#[test] +fn whitespace_outside_of_codeblock_header_is_preserved() { + let input = r#" +some text with spaces +```rust +fn main() { +// code inside is unchanged +} +``` +more text with spaces +"#; + + let expected = r#"

some text with spaces

+
fn main() {
+// code inside is unchanged
+}
+
+

more text with spaces

+"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); +} + +#[test] +fn rust_code_block_properties_are_passed_as_space_delimited_class() { + let input = r#" +```rust,no_run,should_panic,property_3 +``` +"#; + + let expected = r#"
+"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); +} + +#[test] +fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() { + let input = r#" +```rust, no_run,,,should_panic , ,property_3 +``` +"#; + + let expected = r#"
+"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); +} + +#[test] +fn rust_code_block_without_properties_has_proper_html_class() { + let input = r#" +```rust +``` +"#; + + let expected = r#"
+"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + + let input = r#" +```rust +``` +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); +} diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index e601c82b..1012e907 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -17,6 +17,8 @@ use log::{debug, info, trace, warn}; use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition}; use mdbook_core::utils; use mdbook_core::utils::fs::get_404_output_file; +use mdbook_markdown::{render_markdown, render_markdown_with_path}; + use regex::{Captures, Regex}; use serde_json::json; @@ -57,13 +59,10 @@ impl HtmlHandlebars { .insert("git_repository_edit_url".to_owned(), json!(edit_url)); } - let content = utils::render_markdown(&ch.content, ctx.html_config.smart_punctuation()); + let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation()); - let fixed_content = utils::render_markdown_with_path( - &ch.content, - ctx.html_config.smart_punctuation(), - Some(path), - ); + let fixed_content = + render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation(), Some(path)); if !ctx.is_index && ctx.html_config.print.page_break { // Add page break between chapters // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before @@ -178,8 +177,7 @@ impl HtmlHandlebars { .to_string() } }; - let html_content_404 = - utils::render_markdown(&content_404, html_config.smart_punctuation()); + let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation()); let mut data_404 = data.clone(); let base_url = if let Some(site_url) = &html_config.site_url { diff --git a/src/renderer/html_handlebars/helpers/toc.rs b/src/renderer/html_handlebars/helpers/toc.rs index 9528a355..daf2dc39 100644 --- a/src/renderer/html_handlebars/helpers/toc.rs +++ b/src/renderer/html_handlebars/helpers/toc.rs @@ -1,11 +1,10 @@ use std::path::Path; use std::{cmp::Ordering, collections::BTreeMap}; -use mdbook_core::utils::special_escape; - use handlebars::{ Context, Handlebars, Helper, HelperDef, Output, RenderContext, RenderError, RenderErrorReason, }; +use mdbook_markdown::special_escape; // Handlebars helper to construct TOC #[derive(Clone, Copy)] diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs index 054e16d9..02a24b2c 100644 --- a/src/renderer/html_handlebars/search.rs +++ b/src/renderer/html_handlebars/search.rs @@ -8,6 +8,7 @@ use elasticlunr::{Index, IndexBuilder}; use log::{debug, warn}; use mdbook_core::config::{Search, SearchChapterSettings}; use mdbook_core::utils; +use mdbook_markdown::new_cmark_parser; use pulldown_cmark::*; use serde::Serialize; @@ -134,7 +135,7 @@ fn render_item( .with_context(|| "Could not convert HTML path to str")?; let anchor_base = utils::fs::normalize_path(filepath); - let mut p = utils::new_cmark_parser(&chapter.content, false).peekable(); + let mut p = new_cmark_parser(&chapter.content, false).peekable(); let mut in_heading = false; let max_section_depth = u32::from(search_config.heading_split_level); diff --git a/tests/testsuite/markdown.rs b/tests/testsuite/markdown.rs index 3fc08425..d9851559 100644 --- a/tests/testsuite/markdown.rs +++ b/tests/testsuite/markdown.rs @@ -22,10 +22,10 @@ fn footnotes() { cmd.expect_stderr(str![[r#" [TIMESTAMP] [INFO] (mdbook::book): Book building has started [TIMESTAMP] [INFO] (mdbook::book): Running the html backend -[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in defined multiple times - not updating to new definition -[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `` is defined but not referenced -[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition -[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `footnotes.md` is defined but not referenced +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in defined multiple times - not updating to new definition +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `` is defined but not referenced +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition +[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced [TIMESTAMP] [INFO] (mdbook::renderer::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book` "#]]);