diff --git a/crates/mdbook-html/src/html/print.rs b/crates/mdbook-html/src/html/print.rs index e325396f..e044f995 100644 --- a/crates/mdbook-html/src/html/print.rs +++ b/crates/mdbook-html/src/html/print.rs @@ -9,15 +9,18 @@ use crate::html::{ChapterTree, Element, serialize}; use crate::utils::{ToUrlPath, id_from_content, normalize_path, unique_id}; use mdbook_core::static_regex; use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; /// Takes all the chapter trees, modifies them to be suitable to render for /// the print page, and returns an string of all the chapters rendered to a /// single HTML page. -pub(crate) fn render_print_page(mut chapter_trees: Vec>) -> String { +pub(crate) fn render_print_page( + mut chapter_trees: Vec>, + site_url: Option<&str>, +) -> String { let (id_remap, mut id_counter) = make_ids_unique(&mut chapter_trees); let path_to_root_id = make_root_id_map(&mut chapter_trees, &mut id_counter); - rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id); + rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id, site_url); let mut print_content = String::new(); for ChapterTree { tree, .. } in chapter_trees { @@ -125,6 +128,7 @@ fn rewrite_links( chapter_trees: &mut [ChapterTree<'_>], id_remap: &HashMap>, path_to_root_id: &HashMap, + site_url: Option<&str>, ) { static_regex!( LINK, @@ -149,13 +153,21 @@ fn rewrite_links( continue; } for attr in ["href", "src", "xlink:href"] { - let Some(dest) = el.attr(attr) else { + let Some(dest) = el.attr(attr).map(str::to_string) else { continue; }; - let Some(caps) = LINK.captures(&dest) else { + // Links emitted under `site-url` are absolute (`{site_url}path`) + // and root-relative (anchored at the book root). Strip the + // prefix so the path resolves against the print page like any + // other chapter link, instead of being skipped as a scheme. + let (search, root_relative) = match site_url { + Some(site_url) if dest.starts_with(site_url) => (&dest[site_url.len()..], true), + _ => (dest.as_str(), false), + }; + let Some(caps) = LINK.captures(search) else { continue; }; - if caps.name("scheme").is_some() { + if !root_relative && caps.name("scheme").is_some() { continue; } // The lookup_key is the key to look up in the remap table. @@ -164,18 +176,29 @@ fn rewrite_links( && let href_path = href_path.as_str() && !href_path.is_empty() { - lookup_key.pop(); - lookup_key.push(href_path); - lookup_key = normalize_path(&lookup_key); + if root_relative { + // The path is already relative to the book root. + lookup_key = normalize_path(Path::new(href_path)); + } else { + lookup_key.pop(); + lookup_key.push(href_path); + lookup_key = normalize_path(&lookup_key); + } let is_a_chapter = path_to_root_id.contains_key(&lookup_key); if !is_a_chapter { - // Make the link relative to the print page location. - let mut rel_path = normalize_path(&base.join(href_path)).to_url_path(); + // Not part of the print page; rebuild a link to the + // standalone resource, preserving the absolute form for + // `site-url` links and a print-relative path otherwise. + let mut link = if root_relative { + format!("{}{href_path}", site_url.unwrap_or_default()) + } else { + normalize_path(&base.join(href_path)).to_url_path() + }; if let Some(anchor) = caps.name("anchor") { - rel_path.push('#'); - rel_path.push_str(anchor.as_str()); + link.push('#'); + link.push_str(anchor.as_str()); } - el.insert_attr(attr, rel_path.into()); + el.insert_attr(attr, link.into()); continue; } } diff --git a/crates/mdbook-html/src/html/tree.rs b/crates/mdbook-html/src/html/tree.rs index 5cb97ce3..5f8cb871 100644 --- a/crates/mdbook-html/src/html/tree.rs +++ b/crates/mdbook-html/src/html/tree.rs @@ -542,7 +542,7 @@ where let href: StrTendril = if matches!(link_type, LinkType::Email) { format!("mailto:{dest_url}").into() } else { - fix_link(dest_url).into_tendril() + fix_link(dest_url, self.options.config.site_url.as_deref()).into_tendril() }; let mut a = Element::new("a"); a.insert_attr("href", href); @@ -558,7 +558,8 @@ where id: _, } => { let mut img = Element::new("img"); - let src = fix_link(dest_url).into_tendril(); + let src = + fix_link(dest_url, self.options.config.site_url.as_deref()).into_tendril(); img.insert_attr("src", src); if !title.is_empty() { img.insert_attr("title", title.into_tendril()); @@ -675,7 +676,7 @@ where self_closing: tag.self_closing, was_raw: true, }; - fix_html_link(&mut el); + fix_html_link(&mut el, self.options.config.site_url.as_deref()); self.push(Node::Element(el)); if is_closed { // No end element. @@ -1090,7 +1091,12 @@ fn text_in_node(node: NodeRef<'_, Node>, output: &mut String) { /// Modifies links to work with HTML. /// /// For local paths, this changes the `.md` extension to `.html`. -fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> { +/// +/// When `site_url` is set (the `output.html.site-url` option), root-relative +/// links written as `./path` are rewritten to absolute `{site_url}path` links, +/// so a book served from a subdirectory resolves cross-chapter links correctly +/// regardless of the page's own depth. +fn fix_link<'a>(link: CowStr<'a>, site_url: Option<&str>) -> CowStr<'a> { static_regex!(SCHEME_LINK, r"^[a-z][a-z0-9+.-]*:"); static_regex!(MD_LINK, r"(?P.*)\.md(?P#.*)?"); @@ -1104,7 +1110,7 @@ fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> { } // This is a relative link, adjust it as necessary. - if let Some(caps) = MD_LINK.captures(&link) { + let link = if let Some(caps) = MD_LINK.captures(&link) { let mut fixed_link = String::from(&caps["link"]); fixed_link.push_str(".html"); if let Some(anchor) = caps.name("anchor") { @@ -1113,17 +1119,26 @@ fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> { CowStr::from(fixed_link) } else { link + }; + + // Anchor root-relative `./` links to the configured site URL. + if let Some(site_url) = site_url + && let Some(rest) = link.strip_prefix("./") + { + CowStr::from(format!("{site_url}{rest}")) + } else { + link } } /// Calls [`fix_link`] for HTML elements. -fn fix_html_link(el: &mut Element) { +fn fix_html_link(el: &mut Element, site_url: Option<&str>) { if el.name() != "a" { return; } for attr in ["href", "xlink:href"] { if let Some(value) = el.attr(attr) { - let fixed = fix_link(value.into()); + let fixed = fix_link(value.into(), site_url); el.insert_attr(attr, fixed.into_tendril()); } } @@ -1153,3 +1168,56 @@ pub(crate) fn is_void_element(name: &str) -> bool { | "wbr" ) } + +#[cfg(test)] +mod fix_link_tests { + use super::fix_link; + + #[test] + fn rewrites_md_extension() { + assert_eq!(&*fix_link("foo.md".into(), None), "foo.html"); + assert_eq!(&*fix_link("foo.md#frag".into(), None), "foo.html#frag"); + } + + #[test] + fn leaves_schemes_and_fragments_alone() { + assert_eq!( + &*fix_link("https://example.com/x".into(), None), + "https://example.com/x" + ); + assert_eq!(&*fix_link("mailto:a@b.c".into(), None), "mailto:a@b.c"); + assert_eq!(&*fix_link("#anchor".into(), None), "#anchor"); + } + + #[test] + fn site_url_anchors_root_relative_links() { + let site = Some("https://example.com/docs/"); + assert_eq!( + &*fix_link("./nested/deep.md".into(), site), + "https://example.com/docs/nested/deep.html" + ); + // Non-markdown root-relative links are anchored too. + assert_eq!( + &*fix_link("./img/logo.png".into(), site), + "https://example.com/docs/img/logo.png" + ); + // The anchor is preserved through the rewrite. + assert_eq!( + &*fix_link("./other.md#sec".into(), site), + "https://example.com/docs/other.html#sec" + ); + } + + #[test] + fn site_url_does_not_touch_schemes_or_non_dot_relative() { + let site = Some("https://example.com/docs/"); + // Absolute/scheme links are never rewritten. + assert_eq!( + &*fix_link("https://rust-lang.org".into(), site), + "https://rust-lang.org" + ); + // Only `./`-prefixed links are treated as root-relative; bare relative + // links keep their page-relative meaning. + assert_eq!(&*fix_link("sibling.md".into(), site), "sibling.html"); + } +} diff --git a/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs b/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs index 8edac3ca..9a42c59a 100644 --- a/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs +++ b/crates/mdbook-html/src/html_handlebars/hbs_renderer.rs @@ -83,8 +83,15 @@ impl HtmlHandlebars { ctx.data.insert("content".to_owned(), json!(content)); ctx.data.insert("chapter_title".to_owned(), json!(ch.name)); ctx.data.insert("title".to_owned(), json!(title)); + // With `site-url` configured, every page roots its chrome, navigation, + // sidebar (via `toc.js`) and asset links at the absolute site URL + // instead of a depth-relative prefix. + let path_to_root = match &ctx.html_config.site_url { + Some(site_url) => site_url.clone(), + None => fs::path_to_root(path), + }; ctx.data - .insert("path_to_root".to_owned(), json!(fs::path_to_root(path))); + .insert("path_to_root".to_owned(), json!(path_to_root)); if let Some(ref section) = ch.number { ctx.data .insert("section".to_owned(), json!(section.to_string())); @@ -125,7 +132,12 @@ impl HtmlHandlebars { if prev_ch.is_none() { ctx.data.insert("path".to_owned(), json!("index.md")); - ctx.data.insert("path_to_root".to_owned(), json!("")); + let index_root = match &ctx.html_config.site_url { + Some(site_url) => site_url.clone(), + None => String::new(), + }; + ctx.data + .insert("path_to_root".to_owned(), json!(index_root)); ctx.data.insert("is_index".to_owned(), json!(true)); let rendered_index = ctx.handlebars.render("index", &ctx.data)?; debug!("Creating index.html from {}", ctx_path); @@ -201,8 +213,9 @@ impl HtmlHandlebars { handlebars: &Handlebars<'_>, data: &mut serde_json::Map, chapter_trees: Vec>, + site_url: Option<&str>, ) -> Result { - let print_content = crate::html::render_print_page(chapter_trees); + let print_content = crate::html::render_print_page(chapter_trees, site_url); if let Some(ref title) = ctx.config.book.title { data.insert("title".to_owned(), json!(title)); @@ -214,10 +227,13 @@ impl HtmlHandlebars { data.insert("is_print".to_owned(), json!(true)); data.insert("path".to_owned(), json!("print.md")); data.insert("content".to_owned(), json!(print_content)); - data.insert( - "path_to_root".to_owned(), - json!(fs::path_to_root(Path::new("print.md"))), - ); + // Root the print page chrome, assets and sidebar at the absolute site + // URL when configured, matching the per-chapter behaviour. + let path_to_root = match site_url { + Some(site_url) => site_url.to_owned(), + None => fs::path_to_root(Path::new("print.md")), + }; + data.insert("path_to_root".to_owned(), json!(path_to_root)); debug!("Render template"); let rendered = handlebars.render("index", &data)?; @@ -396,9 +412,17 @@ impl Renderer for HtmlHandlebars { { data.insert("is_toc_html".to_owned(), json!(true)); data.insert("path".to_owned(), json!("toc.html")); + // The no-JS sidebar fallback iframe contains only root-relative + // chapter links; a `` of the site URL resolves them (and + // the iframe's own assets) absolutely. Scoped to this render so it + // does not leak into the per-chapter `data` clones below. + if let Some(site_url) = &html_config.site_url { + data.insert("base_url".to_owned(), json!(site_url)); + } let rendered_toc = handlebars.render("toc_html", &data)?; fs::write(destination.join("toc.html"), rendered_toc)?; debug!("Creating toc.html ✓"); + data.remove("base_url"); data.remove("path"); data.remove("is_toc_html"); } @@ -433,8 +457,13 @@ impl Renderer for HtmlHandlebars { // Render the print version. if html_config.print.enable { - let print_rendered = - self.render_print_page(ctx, &handlebars, &mut data, chapter_trees)?; + let print_rendered = self.render_print_page( + ctx, + &handlebars, + &mut data, + chapter_trees, + html_config.site_url.as_deref(), + )?; fs::write(destination.join("print.html"), print_rendered)?; debug!("Creating print.html ✓"); diff --git a/crates/mdbook-html/src/html_handlebars/helpers/resources.rs b/crates/mdbook-html/src/html_handlebars/helpers/resources.rs index f3cf9cd4..b5572c7e 100644 --- a/crates/mdbook-html/src/html_handlebars/helpers/resources.rs +++ b/crates/mdbook-html/src/html_handlebars/helpers/resources.rs @@ -27,16 +27,31 @@ impl HelperDef for ResourceHelper { ) })?; - let base_path = rc - .evaluate(ctx, "@root/path")? - .as_json() - .as_str() - .ok_or_else(|| { - RenderErrorReason::Other("Type error for `path`, string expected".to_owned()) - })? - .replace("\"", ""); - - let path_to_root = utils::fs::path_to_root(&base_path); + // Honor an explicit `path_to_root` from the render data when present + // (the `site-url` feature sets it to the absolute site root). Fall back + // to deriving it from the page path, which is the depth-relative prefix + // used for ordinary builds. + let path_to_root = match rc.evaluate(ctx, "@root/path_to_root") { + Ok(value) => value + .as_json() + .as_str() + .map(|s| s.replace('"', "")) + .unwrap_or_default(), + Err(_) => String::new(), + }; + let path_to_root = if path_to_root.is_empty() { + let base_path = rc + .evaluate(ctx, "@root/path")? + .as_json() + .as_str() + .ok_or_else(|| { + RenderErrorReason::Other("Type error for `path`, string expected".to_owned()) + })? + .replace("\"", ""); + utils::fs::path_to_root(&base_path) + } else { + path_to_root + }; out.write(&path_to_root)?; out.write(self.hash_map.get(param).map(|p| &p[..]).unwrap_or(¶m))?; diff --git a/guide/src/cli/serve.md b/guide/src/cli/serve.md index 43aa42ec..d7d06823 100644 --- a/guide/src/cli/serve.md +++ b/guide/src/cli/serve.md @@ -44,6 +44,16 @@ book. Relative paths are interpreted relative to the current directory. If not specified it will default to the value of the `build.build-dir` key in `book.toml`, or to `./book`. +#### `--preserve-site-url` + +By default `serve` overrides the [`output.html.site-url`] setting to `/`, since +the book is hosted at the root of the local server and links must resolve there. +When you have configured a `site-url` (for example to emit absolute links for a +book hosted in a subdirectory), the `--preserve-site-url` flag keeps the +configured value so you can preview those production links locally. + +[`output.html.site-url`]: ../format/configuration/renderers.md#html-renderer-options + {{#include arg-watcher.md}} #### Specify exclude patterns diff --git a/guide/src/format/configuration/renderers.md b/guide/src/format/configuration/renderers.md index 22dfd425..3991617a 100644 --- a/guide/src/format/configuration/renderers.md +++ b/guide/src/format/configuration/renderers.md @@ -155,10 +155,14 @@ The following configuration options are available: - **input-404:** The name of the markdown file used for missing files. The corresponding output file will be the same, with the extension replaced with `html`. Defaults to `404.md`. -- **site-url:** The url where the book will be hosted. This is required to ensure - navigation links and script/css imports in the 404 file work correctly, even when accessing - urls in subdirectories. Defaults to `/`. If `site-url` is set, - make sure to use document relative links for your assets, meaning they should not start with `/`. +- **site-url:** The absolute URL where the book will be hosted, for example `/` + or `https://example.com/docs/`. When set, every generated link — page + navigation, the sidebar, static asset imports and the 404 page — is rooted at + this URL, so the book resolves correctly even when served from a subdirectory + or accessed at a deeply nested path. Root-relative links written in chapter + content as `./path` are also anchored to this URL. Defaults to `/`. During + [`mdbook serve`](../../cli/serve.md) this value is overridden to `/` for local + previewing unless `--preserve-site-url` is passed. - **cname:** The DNS subdomain or apex domain at which your book will be hosted. This string will be written to a file named CNAME in the root of your site, as required by GitHub Pages (see [*Managing a custom domain for your GitHub Pages diff --git a/src/cmd/serve.rs b/src/cmd/serve.rs index 255c077d..5f603117 100644 --- a/src/cmd/serve.rs +++ b/src/cmd/serve.rs @@ -43,6 +43,10 @@ pub fn make_subcommand() -> Command { .value_parser(NonEmptyStringValueParser::new()) .help("Port to use for HTTP connections"), ) + .arg( + arg!(--"preserve-site-url" "Keep the configured `output.html.site-url` instead of \ + overriding it to `/` for local serving (useful to preview production absolute links)"), + ) .arg_open() .arg_watcher() } @@ -55,16 +59,32 @@ pub fn execute(args: &ArgMatches) -> Result<()> { let port = args.get_one::("port").unwrap(); let hostname = args.get_one::("hostname").unwrap(); let open_browser = args.get_flag("open"); + let preserve_site_url = args.get_flag("preserve-site-url"); let address = format!("{hostname}:{port}"); + // The book is served from the root of the local HTTP server, so a configured + // `site-url` (used for the 404 page and, when enabled, absolute links) is + // overridden to "/" so links resolve locally. `--preserve-site-url` keeps the + // configured value, e.g. to preview the production absolute links. + if !preserve_site_url + && let Some(site_url) = book.config.html_config().and_then(|c| c.site_url) + && site_url != "/" + { + info!( + "overriding `output.html.site-url` (`{site_url}`) to `/` for local serving; \ + pass `--preserve-site-url` to keep it" + ); + } + let update_config = |book: &mut MDBook| { book.config .set("output.html.live-reload-endpoint", LIVE_RELOAD_ENDPOINT) .expect("live-reload-endpoint update failed"); set_dest_dir(args, book); - // Override site-url for local serving of the 404 file - book.config.set("output.html.site-url", "/").unwrap(); + if !preserve_site_url { + book.config.set("output.html.site-url", "/").unwrap(); + } }; update_config(&mut book); book.build()?; diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index c1288298..72155f67 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -322,3 +322,98 @@ HTML tags must be closed before exiting a markdown element. str![[r##"

Option

"##]], ); } + +// The following tests cover the `output.html.site-url` feature, which makes +// every generated link absolute (rooted at `site-url`) so a book served from a +// subdirectory resolves cross-chapter, asset, and sidebar links regardless of +// the page's own depth. See https://github.com/rust-lang/mdBook/pull/1802. + +// Root-relative `./` links written in chapter content are anchored to the site +// URL, while links with a scheme (e.g. `https`) are left untouched. +#[test] +fn site_url_rewrites_content_links() { + BookTest::from_dir("rendering/site_url") + .check_file_contains( + "book/nested/deep.html", + "other chapter", + ) + .check_file_contains( + "book/index.html", + "deep chapter", + ) + .check_file_contains( + "book/index.html", + "external link", + ); +} + +// `path_to_root` (used by the page chrome, prev/next navigation, and the +// JavaScript sidebar in `toc.js`) becomes the absolute site URL on every page, +// independent of how deeply the page is nested. +#[test] +fn site_url_sets_absolute_path_to_root() { + BookTest::from_dir("rendering/site_url").check_file_contains( + "book/nested/deep.html", + "const path_to_root = \"https://example.com/docs/\";", + ); +} + +// Static assets resolved through the `{{resource}}` helper are emitted with the +// absolute site URL rather than a depth-relative `../` prefix. +#[test] +fn site_url_makes_assets_absolute() { + BookTest::from_dir("rendering/site_url").check_file_contains( + "book/nested/deep.html", + "` of the site URL so its root-relative chapter links resolve +// absolutely. +#[test] +fn site_url_sets_toc_html_base() { + BookTest::from_dir("rendering/site_url") + .check_file_contains("book/toc.html", ""); +} + +// The `` from `toc.html` must not leak onto regular chapter pages, +// which would break their page-relative content links. +#[test] +fn site_url_no_base_href_on_chapter_pages() { + BookTest::from_dir("rendering/site_url") + .check_file_doesnt_contain("book/nested/deep.html", "` +// is emitted: the feature is strictly opt-in. +#[test] +fn site_url_absent_keeps_links_relative() { + BookTest::init(|_| {}) + .check_file_contains("book/index.html", "const path_to_root = \"\";") + .check_file_doesnt_contain("book/index.html", "deep chapter") + .check_file_contains("book/print.html", "other chapter") + .check_file_contains( + "book/print.html", + "external link", + ); +} diff --git a/tests/testsuite/rendering/site_url/book.toml b/tests/testsuite/rendering/site_url/book.toml new file mode 100644 index 00000000..c27bd1a8 --- /dev/null +++ b/tests/testsuite/rendering/site_url/book.toml @@ -0,0 +1,5 @@ +[book] +title = "site_url" + +[output.html] +site-url = "https://example.com/docs/" diff --git a/tests/testsuite/rendering/site_url/src/README.md b/tests/testsuite/rendering/site_url/src/README.md new file mode 100644 index 00000000..244f7d0c --- /dev/null +++ b/tests/testsuite/rendering/site_url/src/README.md @@ -0,0 +1,5 @@ +# Intro + +A root-relative link to the [deep chapter](./nested/deep.md). + +An [external link](https://rust-lang.org) must stay untouched. diff --git a/tests/testsuite/rendering/site_url/src/SUMMARY.md b/tests/testsuite/rendering/site_url/src/SUMMARY.md new file mode 100644 index 00000000..9b2c921a --- /dev/null +++ b/tests/testsuite/rendering/site_url/src/SUMMARY.md @@ -0,0 +1,5 @@ +# Summary + +- [Intro](README.md) + - [Deep](nested/deep.md) +- [Other](other.md) diff --git a/tests/testsuite/rendering/site_url/src/nested/deep.md b/tests/testsuite/rendering/site_url/src/nested/deep.md new file mode 100644 index 00000000..5b99162c --- /dev/null +++ b/tests/testsuite/rendering/site_url/src/nested/deep.md @@ -0,0 +1,3 @@ +# Deep + +See the [other chapter](./other.md). diff --git a/tests/testsuite/rendering/site_url/src/other.md b/tests/testsuite/rendering/site_url/src/other.md new file mode 100644 index 00000000..c0dfcb5c --- /dev/null +++ b/tests/testsuite/rendering/site_url/src/other.md @@ -0,0 +1,3 @@ +# Other + +Back to the [deep chapter](./nested/deep.md).