135 lines
4.5 KiB
Rust
135 lines
4.5 KiB
Rust
//! Utilities for processing HTML.
|
|
|
|
use std::collections::HashSet;
|
|
use std::path::{Component, Path, PathBuf};
|
|
|
|
/// Utility function to normalize path elements like `..`.
|
|
pub(crate) fn normalize_path(path: &Path) -> PathBuf {
|
|
let mut components = path.components().peekable();
|
|
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
|
|
components.next();
|
|
PathBuf::from(c.as_os_str())
|
|
} else {
|
|
PathBuf::new()
|
|
};
|
|
|
|
for component in components {
|
|
match component {
|
|
Component::Prefix(..) => unreachable!(),
|
|
Component::RootDir => {
|
|
ret.push(Component::RootDir);
|
|
}
|
|
Component::CurDir => {}
|
|
Component::ParentDir => {
|
|
if ret.ends_with(Component::ParentDir) {
|
|
ret.push(Component::ParentDir);
|
|
} else {
|
|
let popped = ret.pop();
|
|
if !popped && !ret.has_root() {
|
|
ret.push(Component::ParentDir);
|
|
}
|
|
}
|
|
}
|
|
Component::Normal(c) => {
|
|
ret.push(c);
|
|
}
|
|
}
|
|
}
|
|
ret
|
|
}
|
|
|
|
/// Helper trait for converting a [`Path`] to a string suitable for an HTML path.
|
|
pub(crate) trait ToUrlPath {
|
|
fn to_url_path(&self) -> String;
|
|
}
|
|
|
|
impl ToUrlPath for Path {
|
|
fn to_url_path(&self) -> String {
|
|
// We're generally assuming that all paths we deal with are utf-8.
|
|
// The replace here is to handle Windows paths.
|
|
self.to_str().unwrap().replace('\\', "/")
|
|
}
|
|
}
|
|
|
|
/// Make sure an HTML id is unique.
|
|
///
|
|
/// Keeps a set of all previously returned IDs; if the requested id is already
|
|
/// used, numeric suffixes (-1, -2, ...) are tried until an unused one is found.
|
|
pub(crate) fn unique_id(id: &str, used: &mut HashSet<String>) -> String {
|
|
if used.insert(id.to_string()) {
|
|
return id.to_string();
|
|
}
|
|
|
|
// This ID is already in use. Generate one that is not by appending a
|
|
// numeric suffix.
|
|
let mut counter: u32 = 1;
|
|
loop {
|
|
let candidate = format!("{id}-{counter}");
|
|
if used.insert(candidate.clone()) {
|
|
return candidate;
|
|
}
|
|
counter += 1;
|
|
}
|
|
}
|
|
|
|
/// Generates an HTML id from the given text.
|
|
pub(crate) fn id_from_content(content: &str) -> String {
|
|
// This is intended to be close to how header ID generation is done in
|
|
// other sites and tools, but is not 100% the same. Not all sites and
|
|
// tools use the same algorithm. See these for more information:
|
|
//
|
|
// - https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#section-links
|
|
// - https://docs.gitlab.com/user/markdown/#heading-ids-and-links
|
|
// - https://pandoc.org/MANUAL.html#extension-auto_identifiers
|
|
// - https://kramdown.gettalong.org/converter/html#auto-ids
|
|
// - https://docs.rs/comrak/latest/comrak/options/struct.Extension.html#structfield.header_ids
|
|
content
|
|
.trim()
|
|
.to_lowercase()
|
|
.chars()
|
|
.filter_map(|ch| {
|
|
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
|
|
Some(ch)
|
|
} else if ch.is_whitespace() {
|
|
Some('-')
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn it_generates_unique_ids() {
|
|
let mut id_counter = Default::default();
|
|
|
|
assert_eq!(unique_id("", &mut id_counter), "");
|
|
assert_eq!(unique_id("Über", &mut id_counter), "Über");
|
|
assert_eq!(unique_id("Über", &mut id_counter), "Über-1");
|
|
assert_eq!(unique_id("Über", &mut id_counter), "Über-2");
|
|
}
|
|
|
|
#[test]
|
|
fn it_normalizes_ids() {
|
|
assert_eq!(
|
|
id_from_content("`--passes`: add more rustdoc passes"),
|
|
"--passes-add-more-rustdoc-passes"
|
|
);
|
|
assert_eq!(
|
|
id_from_content("Method-call 🐙 expressions \u{1f47c}"),
|
|
"method-call--expressions-"
|
|
);
|
|
assert_eq!(id_from_content("_-_12345"), "_-_12345");
|
|
assert_eq!(id_from_content("12345"), "12345");
|
|
assert_eq!(id_from_content("中文"), "中文");
|
|
assert_eq!(id_from_content("にほんご"), "にほんご");
|
|
assert_eq!(id_from_content("한국어"), "한국어");
|
|
assert_eq!(id_from_content(""), "");
|
|
assert_eq!(id_from_content("中文標題 CJK title"), "中文標題-cjk-title");
|
|
assert_eq!(id_from_content("Über"), "über");
|
|
}
|
|
}
|