From d0bde467e0ba01ba16a06a1d011f1efdb8c048c0 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 4 Nov 2025 16:01:06 -0800 Subject: [PATCH] Lowercase heading IDs This switches from ASCII lowercase to Unicode lowercase when generating heading IDs. This brings mdbook more in line with other tools and sites when they generate heading IDs. The generation still isn't 100% the same as other tools and sites, but it is usually the same in most cases. Closes https://github.com/rust-lang/mdBook/issues/1059 --- CHANGELOG.md | 7 +++++-- crates/mdbook-html/src/utils.rs | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85620126..8db03518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,8 +58,11 @@ The following is a summary of the changes that may require your attention when u [#2847](https://github.com/rust-lang/mdBook/pull/2847) - Added support for admonitions. These are enabled by default, with the option `output.html.admonitions` to disable it. [#2851](https://github.com/rust-lang/mdBook/pull/2851) -- Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped. This brings the header ID generation closer to other tools and sites. - [#2844](https://github.com/rust-lang/mdBook/pull/2844) +- Header ID generation has some minor changes to bring the ID generation closer to other tools and sites: + - IDs now use Unicode lowercase instead of ASCII lowercase. + [#2922](https://github.com/rust-lang/mdBook/pull/2922) + - Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped. + [#2844](https://github.com/rust-lang/mdBook/pull/2844) ### CLI changes diff --git a/crates/mdbook-html/src/utils.rs b/crates/mdbook-html/src/utils.rs index 6c17b8d5..b5bc2d7c 100644 --- a/crates/mdbook-html/src/utils.rs +++ b/crates/mdbook-html/src/utils.rs @@ -76,10 +76,11 @@ pub(crate) fn unique_id(id: &str, used: &mut HashSet) -> String { pub(crate) fn id_from_content(content: &str) -> String { content .trim() + .to_lowercase() .chars() .filter_map(|ch| { if ch.is_alphanumeric() || ch == '_' || ch == '-' { - Some(ch.to_ascii_lowercase()) + Some(ch) } else if ch.is_whitespace() { Some('-') } else { @@ -120,6 +121,6 @@ mod tests { assert_eq!(id_from_content("한국어"), "한국어"); assert_eq!(id_from_content(""), ""); assert_eq!(id_from_content("中文標題 CJK title"), "中文標題-cjk-title"); - assert_eq!(id_from_content("Über"), "Über"); + assert_eq!(id_from_content("Über"), "über"); } }