feat: site-url absolute links on mdBook 0.5.3 (crates layout)

Port of the 0.4.x site-url absolute-links patch to the 0.5 crates/ layout,
toward upstreaming as PR #1802. When output.html.site-url is set, internal
links and assets are emitted as absolute URLs anchored at site-url, so the
book works under a sub-path (e.g. /cdcidao/) regardless of page depth.

- html/tree.rs: fix_link/fix_html_link rewrite ./ content, image and raw-HTML
  links to {site_url}...; schemes and fragments untouched
- html_handlebars/hbs_renderer.rs: path_to_root = site_url for normal and index
  pages; base_url = site_url only for the toc.html iframe (removed before the
  per-chapter clone so it cannot leak)
- html_handlebars/helpers/resources.rs: {{resource}} honors an explicit
  path_to_root from data (absolute assets) with stock fallback
- html/print.rs: print page honors site-url; internal cross-refs still fold to
  #anchors, non-chapter links keep absolute form
- cmd/serve.rs: --preserve-site-url flag; serve still forces site-url to / for
  local preview but logs the override
- tests/testsuite/rendering*: site_url fixture + tests (content, assets, print,
  no <base> leak, no-regression without site-url)
- guide: document the serve flag and the renderer behavior
This commit is contained in:
Jesús Pérez 2026-06-18 23:03:22 +01:00
parent 9873b35405
commit 0784554c88
13 changed files with 331 additions and 46 deletions

View file

@ -9,15 +9,18 @@ use crate::html::{ChapterTree, Element, serialize};
use crate::utils::{ToUrlPath, id_from_content, normalize_path, unique_id};
use mdbook_core::static_regex;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
/// Takes all the chapter trees, modifies them to be suitable to render for
/// the print page, and returns an string of all the chapters rendered to a
/// single HTML page.
pub(crate) fn render_print_page(mut chapter_trees: Vec<ChapterTree<'_>>) -> String {
pub(crate) fn render_print_page(
mut chapter_trees: Vec<ChapterTree<'_>>,
site_url: Option<&str>,
) -> String {
let (id_remap, mut id_counter) = make_ids_unique(&mut chapter_trees);
let path_to_root_id = make_root_id_map(&mut chapter_trees, &mut id_counter);
rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id);
rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id, site_url);
let mut print_content = String::new();
for ChapterTree { tree, .. } in chapter_trees {
@ -125,6 +128,7 @@ fn rewrite_links(
chapter_trees: &mut [ChapterTree<'_>],
id_remap: &HashMap<PathBuf, HashMap<String, String>>,
path_to_root_id: &HashMap<PathBuf, String>,
site_url: Option<&str>,
) {
static_regex!(
LINK,
@ -149,13 +153,21 @@ fn rewrite_links(
continue;
}
for attr in ["href", "src", "xlink:href"] {
let Some(dest) = el.attr(attr) else {
let Some(dest) = el.attr(attr).map(str::to_string) else {
continue;
};
let Some(caps) = LINK.captures(&dest) else {
// Links emitted under `site-url` are absolute (`{site_url}path`)
// and root-relative (anchored at the book root). Strip the
// prefix so the path resolves against the print page like any
// other chapter link, instead of being skipped as a scheme.
let (search, root_relative) = match site_url {
Some(site_url) if dest.starts_with(site_url) => (&dest[site_url.len()..], true),
_ => (dest.as_str(), false),
};
let Some(caps) = LINK.captures(search) else {
continue;
};
if caps.name("scheme").is_some() {
if !root_relative && caps.name("scheme").is_some() {
continue;
}
// The lookup_key is the key to look up in the remap table.
@ -164,18 +176,29 @@ fn rewrite_links(
&& let href_path = href_path.as_str()
&& !href_path.is_empty()
{
lookup_key.pop();
lookup_key.push(href_path);
lookup_key = normalize_path(&lookup_key);
if root_relative {
// The path is already relative to the book root.
lookup_key = normalize_path(Path::new(href_path));
} else {
lookup_key.pop();
lookup_key.push(href_path);
lookup_key = normalize_path(&lookup_key);
}
let is_a_chapter = path_to_root_id.contains_key(&lookup_key);
if !is_a_chapter {
// Make the link relative to the print page location.
let mut rel_path = normalize_path(&base.join(href_path)).to_url_path();
// Not part of the print page; rebuild a link to the
// standalone resource, preserving the absolute form for
// `site-url` links and a print-relative path otherwise.
let mut link = if root_relative {
format!("{}{href_path}", site_url.unwrap_or_default())
} else {
normalize_path(&base.join(href_path)).to_url_path()
};
if let Some(anchor) = caps.name("anchor") {
rel_path.push('#');
rel_path.push_str(anchor.as_str());
link.push('#');
link.push_str(anchor.as_str());
}
el.insert_attr(attr, rel_path.into());
el.insert_attr(attr, link.into());
continue;
}
}

View file

@ -542,7 +542,7 @@ where
let href: StrTendril = if matches!(link_type, LinkType::Email) {
format!("mailto:{dest_url}").into()
} else {
fix_link(dest_url).into_tendril()
fix_link(dest_url, self.options.config.site_url.as_deref()).into_tendril()
};
let mut a = Element::new("a");
a.insert_attr("href", href);
@ -558,7 +558,8 @@ where
id: _,
} => {
let mut img = Element::new("img");
let src = fix_link(dest_url).into_tendril();
let src =
fix_link(dest_url, self.options.config.site_url.as_deref()).into_tendril();
img.insert_attr("src", src);
if !title.is_empty() {
img.insert_attr("title", title.into_tendril());
@ -675,7 +676,7 @@ where
self_closing: tag.self_closing,
was_raw: true,
};
fix_html_link(&mut el);
fix_html_link(&mut el, self.options.config.site_url.as_deref());
self.push(Node::Element(el));
if is_closed {
// No end element.
@ -1090,7 +1091,12 @@ fn text_in_node(node: NodeRef<'_, Node>, output: &mut String) {
/// Modifies links to work with HTML.
///
/// For local paths, this changes the `.md` extension to `.html`.
fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> {
///
/// When `site_url` is set (the `output.html.site-url` option), root-relative
/// links written as `./path` are rewritten to absolute `{site_url}path` links,
/// so a book served from a subdirectory resolves cross-chapter links correctly
/// regardless of the page's own depth.
fn fix_link<'a>(link: CowStr<'a>, site_url: Option<&str>) -> CowStr<'a> {
static_regex!(SCHEME_LINK, r"^[a-z][a-z0-9+.-]*:");
static_regex!(MD_LINK, r"(?P<link>.*)\.md(?P<anchor>#.*)?");
@ -1104,7 +1110,7 @@ fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> {
}
// This is a relative link, adjust it as necessary.
if let Some(caps) = MD_LINK.captures(&link) {
let link = if let Some(caps) = MD_LINK.captures(&link) {
let mut fixed_link = String::from(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
@ -1113,17 +1119,26 @@ fn fix_link<'a>(link: CowStr<'a>) -> CowStr<'a> {
CowStr::from(fixed_link)
} else {
link
};
// Anchor root-relative `./` links to the configured site URL.
if let Some(site_url) = site_url
&& let Some(rest) = link.strip_prefix("./")
{
CowStr::from(format!("{site_url}{rest}"))
} else {
link
}
}
/// Calls [`fix_link`] for HTML elements.
fn fix_html_link(el: &mut Element) {
fn fix_html_link(el: &mut Element, site_url: Option<&str>) {
if el.name() != "a" {
return;
}
for attr in ["href", "xlink:href"] {
if let Some(value) = el.attr(attr) {
let fixed = fix_link(value.into());
let fixed = fix_link(value.into(), site_url);
el.insert_attr(attr, fixed.into_tendril());
}
}
@ -1153,3 +1168,56 @@ pub(crate) fn is_void_element(name: &str) -> bool {
| "wbr"
)
}
#[cfg(test)]
mod fix_link_tests {
use super::fix_link;
#[test]
fn rewrites_md_extension() {
assert_eq!(&*fix_link("foo.md".into(), None), "foo.html");
assert_eq!(&*fix_link("foo.md#frag".into(), None), "foo.html#frag");
}
#[test]
fn leaves_schemes_and_fragments_alone() {
assert_eq!(
&*fix_link("https://example.com/x".into(), None),
"https://example.com/x"
);
assert_eq!(&*fix_link("mailto:a@b.c".into(), None), "mailto:a@b.c");
assert_eq!(&*fix_link("#anchor".into(), None), "#anchor");
}
#[test]
fn site_url_anchors_root_relative_links() {
let site = Some("https://example.com/docs/");
assert_eq!(
&*fix_link("./nested/deep.md".into(), site),
"https://example.com/docs/nested/deep.html"
);
// Non-markdown root-relative links are anchored too.
assert_eq!(
&*fix_link("./img/logo.png".into(), site),
"https://example.com/docs/img/logo.png"
);
// The anchor is preserved through the rewrite.
assert_eq!(
&*fix_link("./other.md#sec".into(), site),
"https://example.com/docs/other.html#sec"
);
}
#[test]
fn site_url_does_not_touch_schemes_or_non_dot_relative() {
let site = Some("https://example.com/docs/");
// Absolute/scheme links are never rewritten.
assert_eq!(
&*fix_link("https://rust-lang.org".into(), site),
"https://rust-lang.org"
);
// Only `./`-prefixed links are treated as root-relative; bare relative
// links keep their page-relative meaning.
assert_eq!(&*fix_link("sibling.md".into(), site), "sibling.html");
}
}

View file

@ -83,8 +83,15 @@ impl HtmlHandlebars {
ctx.data.insert("content".to_owned(), json!(content));
ctx.data.insert("chapter_title".to_owned(), json!(ch.name));
ctx.data.insert("title".to_owned(), json!(title));
// With `site-url` configured, every page roots its chrome, navigation,
// sidebar (via `toc.js`) and asset links at the absolute site URL
// instead of a depth-relative prefix.
let path_to_root = match &ctx.html_config.site_url {
Some(site_url) => site_url.clone(),
None => fs::path_to_root(path),
};
ctx.data
.insert("path_to_root".to_owned(), json!(fs::path_to_root(path)));
.insert("path_to_root".to_owned(), json!(path_to_root));
if let Some(ref section) = ch.number {
ctx.data
.insert("section".to_owned(), json!(section.to_string()));
@ -125,7 +132,12 @@ impl HtmlHandlebars {
if prev_ch.is_none() {
ctx.data.insert("path".to_owned(), json!("index.md"));
ctx.data.insert("path_to_root".to_owned(), json!(""));
let index_root = match &ctx.html_config.site_url {
Some(site_url) => site_url.clone(),
None => String::new(),
};
ctx.data
.insert("path_to_root".to_owned(), json!(index_root));
ctx.data.insert("is_index".to_owned(), json!(true));
let rendered_index = ctx.handlebars.render("index", &ctx.data)?;
debug!("Creating index.html from {}", ctx_path);
@ -201,8 +213,9 @@ impl HtmlHandlebars {
handlebars: &Handlebars<'_>,
data: &mut serde_json::Map<String, serde_json::Value>,
chapter_trees: Vec<ChapterTree<'_>>,
site_url: Option<&str>,
) -> Result<String> {
let print_content = crate::html::render_print_page(chapter_trees);
let print_content = crate::html::render_print_page(chapter_trees, site_url);
if let Some(ref title) = ctx.config.book.title {
data.insert("title".to_owned(), json!(title));
@ -214,10 +227,13 @@ impl HtmlHandlebars {
data.insert("is_print".to_owned(), json!(true));
data.insert("path".to_owned(), json!("print.md"));
data.insert("content".to_owned(), json!(print_content));
data.insert(
"path_to_root".to_owned(),
json!(fs::path_to_root(Path::new("print.md"))),
);
// Root the print page chrome, assets and sidebar at the absolute site
// URL when configured, matching the per-chapter behaviour.
let path_to_root = match site_url {
Some(site_url) => site_url.to_owned(),
None => fs::path_to_root(Path::new("print.md")),
};
data.insert("path_to_root".to_owned(), json!(path_to_root));
debug!("Render template");
let rendered = handlebars.render("index", &data)?;
@ -396,9 +412,17 @@ impl Renderer for HtmlHandlebars {
{
data.insert("is_toc_html".to_owned(), json!(true));
data.insert("path".to_owned(), json!("toc.html"));
// The no-JS sidebar fallback iframe contains only root-relative
// chapter links; a `<base href>` of the site URL resolves them (and
// the iframe's own assets) absolutely. Scoped to this render so it
// does not leak into the per-chapter `data` clones below.
if let Some(site_url) = &html_config.site_url {
data.insert("base_url".to_owned(), json!(site_url));
}
let rendered_toc = handlebars.render("toc_html", &data)?;
fs::write(destination.join("toc.html"), rendered_toc)?;
debug!("Creating toc.html ✓");
data.remove("base_url");
data.remove("path");
data.remove("is_toc_html");
}
@ -433,8 +457,13 @@ impl Renderer for HtmlHandlebars {
// Render the print version.
if html_config.print.enable {
let print_rendered =
self.render_print_page(ctx, &handlebars, &mut data, chapter_trees)?;
let print_rendered = self.render_print_page(
ctx,
&handlebars,
&mut data,
chapter_trees,
html_config.site_url.as_deref(),
)?;
fs::write(destination.join("print.html"), print_rendered)?;
debug!("Creating print.html ✓");

View file

@ -27,16 +27,31 @@ impl HelperDef for ResourceHelper {
)
})?;
let base_path = rc
.evaluate(ctx, "@root/path")?
.as_json()
.as_str()
.ok_or_else(|| {
RenderErrorReason::Other("Type error for `path`, string expected".to_owned())
})?
.replace("\"", "");
let path_to_root = utils::fs::path_to_root(&base_path);
// Honor an explicit `path_to_root` from the render data when present
// (the `site-url` feature sets it to the absolute site root). Fall back
// to deriving it from the page path, which is the depth-relative prefix
// used for ordinary builds.
let path_to_root = match rc.evaluate(ctx, "@root/path_to_root") {
Ok(value) => value
.as_json()
.as_str()
.map(|s| s.replace('"', ""))
.unwrap_or_default(),
Err(_) => String::new(),
};
let path_to_root = if path_to_root.is_empty() {
let base_path = rc
.evaluate(ctx, "@root/path")?
.as_json()
.as_str()
.ok_or_else(|| {
RenderErrorReason::Other("Type error for `path`, string expected".to_owned())
})?
.replace("\"", "");
utils::fs::path_to_root(&base_path)
} else {
path_to_root
};
out.write(&path_to_root)?;
out.write(self.hash_map.get(param).map(|p| &p[..]).unwrap_or(&param))?;

View file

@ -44,6 +44,16 @@ book. Relative paths are interpreted relative to the current directory. If
not specified it will default to the value of the `build.build-dir` key in
`book.toml`, or to `./book`.
#### `--preserve-site-url`
By default `serve` overrides the [`output.html.site-url`] setting to `/`, since
the book is hosted at the root of the local server and links must resolve there.
When you have configured a `site-url` (for example to emit absolute links for a
book hosted in a subdirectory), the `--preserve-site-url` flag keeps the
configured value so you can preview those production links locally.
[`output.html.site-url`]: ../format/configuration/renderers.md#html-renderer-options
{{#include arg-watcher.md}}
#### Specify exclude patterns

View file

@ -155,10 +155,14 @@ The following configuration options are available:
- **input-404:** The name of the markdown file used for missing files.
The corresponding output file will be the same, with the extension replaced with `html`.
Defaults to `404.md`.
- **site-url:** The url where the book will be hosted. This is required to ensure
navigation links and script/css imports in the 404 file work correctly, even when accessing
urls in subdirectories. Defaults to `/`. If `site-url` is set,
make sure to use document relative links for your assets, meaning they should not start with `/`.
- **site-url:** The absolute URL where the book will be hosted, for example `/`
or `https://example.com/docs/`. When set, every generated link — page
navigation, the sidebar, static asset imports and the 404 page — is rooted at
this URL, so the book resolves correctly even when served from a subdirectory
or accessed at a deeply nested path. Root-relative links written in chapter
content as `./path` are also anchored to this URL. Defaults to `/`. During
[`mdbook serve`](../../cli/serve.md) this value is overridden to `/` for local
previewing unless `--preserve-site-url` is passed.
- **cname:** The DNS subdomain or apex domain at which your book will be hosted.
This string will be written to a file named CNAME in the root of your site, as
required by GitHub Pages (see [*Managing a custom domain for your GitHub Pages

View file

@ -43,6 +43,10 @@ pub fn make_subcommand() -> Command {
.value_parser(NonEmptyStringValueParser::new())
.help("Port to use for HTTP connections"),
)
.arg(
arg!(--"preserve-site-url" "Keep the configured `output.html.site-url` instead of \
overriding it to `/` for local serving (useful to preview production absolute links)"),
)
.arg_open()
.arg_watcher()
}
@ -55,16 +59,32 @@ pub fn execute(args: &ArgMatches) -> Result<()> {
let port = args.get_one::<String>("port").unwrap();
let hostname = args.get_one::<String>("hostname").unwrap();
let open_browser = args.get_flag("open");
let preserve_site_url = args.get_flag("preserve-site-url");
let address = format!("{hostname}:{port}");
// The book is served from the root of the local HTTP server, so a configured
// `site-url` (used for the 404 page and, when enabled, absolute links) is
// overridden to "/" so links resolve locally. `--preserve-site-url` keeps the
// configured value, e.g. to preview the production absolute links.
if !preserve_site_url
&& let Some(site_url) = book.config.html_config().and_then(|c| c.site_url)
&& site_url != "/"
{
info!(
"overriding `output.html.site-url` (`{site_url}`) to `/` for local serving; \
pass `--preserve-site-url` to keep it"
);
}
let update_config = |book: &mut MDBook| {
book.config
.set("output.html.live-reload-endpoint", LIVE_RELOAD_ENDPOINT)
.expect("live-reload-endpoint update failed");
set_dest_dir(args, book);
// Override site-url for local serving of the 404 file
book.config.set("output.html.site-url", "/").unwrap();
if !preserve_site_url {
book.config.set("output.html.site-url", "/").unwrap();
}
};
update_config(&mut book);
book.build()?;

View file

@ -322,3 +322,98 @@ HTML tags must be closed before exiting a markdown element.
str![[r##"<h3 id="option"><a class="header" href="#option">Option<t></t></a></h3>"##]],
);
}
// The following tests cover the `output.html.site-url` feature, which makes
// every generated link absolute (rooted at `site-url`) so a book served from a
// subdirectory resolves cross-chapter, asset, and sidebar links regardless of
// the page's own depth. See https://github.com/rust-lang/mdBook/pull/1802.
// Root-relative `./` links written in chapter content are anchored to the site
// URL, while links with a scheme (e.g. `https`) are left untouched.
#[test]
fn site_url_rewrites_content_links() {
BookTest::from_dir("rendering/site_url")
.check_file_contains(
"book/nested/deep.html",
"<a href=\"https://example.com/docs/other.html\">other chapter</a>",
)
.check_file_contains(
"book/index.html",
"<a href=\"https://example.com/docs/nested/deep.html\">deep chapter</a>",
)
.check_file_contains(
"book/index.html",
"<a href=\"https://rust-lang.org\">external link</a>",
);
}
// `path_to_root` (used by the page chrome, prev/next navigation, and the
// JavaScript sidebar in `toc.js`) becomes the absolute site URL on every page,
// independent of how deeply the page is nested.
#[test]
fn site_url_sets_absolute_path_to_root() {
BookTest::from_dir("rendering/site_url").check_file_contains(
"book/nested/deep.html",
"const path_to_root = \"https://example.com/docs/\";",
);
}
// Static assets resolved through the `{{resource}}` helper are emitted with the
// absolute site URL rather than a depth-relative `../` prefix.
#[test]
fn site_url_makes_assets_absolute() {
BookTest::from_dir("rendering/site_url").check_file_contains(
"book/nested/deep.html",
"<link rel=\"stylesheet\" href=\"https://example.com/docs/css/general",
);
}
// The no-JS sidebar fallback (`toc.html`, loaded in an iframe) carries a
// `<base href>` of the site URL so its root-relative chapter links resolve
// absolutely.
#[test]
fn site_url_sets_toc_html_base() {
BookTest::from_dir("rendering/site_url")
.check_file_contains("book/toc.html", "<base href=\"https://example.com/docs/\">");
}
// The `<base href>` from `toc.html` must not leak onto regular chapter pages,
// which would break their page-relative content links.
#[test]
fn site_url_no_base_href_on_chapter_pages() {
BookTest::from_dir("rendering/site_url")
.check_file_doesnt_contain("book/nested/deep.html", "<base href")
.check_file_doesnt_contain("book/index.html", "<base href");
}
// Without `site-url`, links and assets stay depth-relative and no `<base href>`
// is emitted: the feature is strictly opt-in.
#[test]
fn site_url_absent_keeps_links_relative() {
BookTest::init(|_| {})
.check_file_contains("book/index.html", "const path_to_root = \"\";")
.check_file_doesnt_contain("book/index.html", "<base href")
.check_file_doesnt_contain("book/toc.html", "<base href");
}
// The print page roots its chrome, assets and sidebar at the site URL, while
// cross-chapter references between chapters present on the page are folded into
// intra-page anchors so the consolidated page stays self-contained.
#[test]
fn site_url_print_page() {
BookTest::from_dir("rendering/site_url")
.check_file_contains(
"book/print.html",
"const path_to_root = \"https://example.com/docs/\";",
)
.check_file_contains(
"book/print.html",
"<link rel=\"stylesheet\" href=\"https://example.com/docs/css/general",
)
.check_file_contains("book/print.html", "<a href=\"#deep\">deep chapter</a>")
.check_file_contains("book/print.html", "<a href=\"#other\">other chapter</a>")
.check_file_contains(
"book/print.html",
"<a href=\"https://rust-lang.org\">external link</a>",
);
}

View file

@ -0,0 +1,5 @@
[book]
title = "site_url"
[output.html]
site-url = "https://example.com/docs/"

View file

@ -0,0 +1,5 @@
# Intro
A root-relative link to the [deep chapter](./nested/deep.md).
An [external link](https://rust-lang.org) must stay untouched.

View file

@ -0,0 +1,5 @@
# Summary
- [Intro](README.md)
- [Deep](nested/deep.md)
- [Other](other.md)

View file

@ -0,0 +1,3 @@
# Deep
See the [other chapter](./other.md).

View file

@ -0,0 +1,3 @@
# Other
Back to the [deep chapter](./nested/deep.md).