2025-04-20 19:42:45 -07:00
|
|
|
//! Various helpers and utilities.
|
2018-01-21 22:35:11 +08:00
|
|
|
|
2025-07-21 11:37:46 -07:00
|
|
|
use anyhow::Error;
|
2022-06-24 16:50:02 +05:30
|
|
|
use log::error;
|
|
|
|
|
use regex::Regex;
|
2017-05-31 22:28:08 -07:00
|
|
|
use std::borrow::Cow;
|
2022-02-18 15:27:24 +00:00
|
|
|
use std::collections::HashMap;
|
2025-05-11 11:46:01 +02:00
|
|
|
use std::sync::LazyLock;
|
2015-09-16 23:35:16 +02:00
|
|
|
|
2025-07-21 11:37:46 -07:00
|
|
|
pub mod fs;
|
|
|
|
|
mod string;
|
2025-07-25 11:28:52 -07:00
|
|
|
mod toml_ext;
|
|
|
|
|
|
|
|
|
|
pub(crate) use self::toml_ext::TomlExt;
|
2025-07-21 11:37:46 -07:00
|
|
|
|
2019-10-05 18:27:03 -04:00
|
|
|
pub use self::string::{
|
|
|
|
|
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
|
|
|
|
|
take_rustdoc_include_lines,
|
|
|
|
|
};
|
2015-09-16 23:35:16 +02:00
|
|
|
|
2018-03-07 07:02:06 -06:00
|
|
|
/// Replaces multiple consecutive whitespace characters with a single space character.
|
2019-05-07 01:20:58 +07:00
|
|
|
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
|
2025-05-11 11:46:01 +02:00
|
|
|
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s\s+").unwrap());
|
2018-03-07 07:02:06 -06:00
|
|
|
RE.replace_all(text, " ")
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-09 11:52:32 +08:00
|
|
|
/// Convert the given string to a valid HTML element ID.
|
|
|
|
|
/// The only restriction is that the ID must not contain any ASCII whitespace.
|
2018-03-07 07:02:06 -06:00
|
|
|
pub fn normalize_id(content: &str) -> String {
|
2018-09-09 11:52:32 +08:00
|
|
|
content
|
2018-03-07 07:02:06 -06:00
|
|
|
.chars()
|
|
|
|
|
.filter_map(|ch| {
|
|
|
|
|
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
|
|
|
|
|
Some(ch.to_ascii_lowercase())
|
|
|
|
|
} else if ch.is_whitespace() {
|
|
|
|
|
Some('-')
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
2019-05-05 21:57:43 +07:00
|
|
|
})
|
|
|
|
|
.collect::<String>()
|
2018-03-07 07:02:06 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Generate an ID for use with anchors which is derived from a "normalised"
|
|
|
|
|
/// string.
|
2025-08-12 18:01:45 -07:00
|
|
|
fn id_from_content(content: &str) -> String {
|
2018-03-07 07:02:06 -06:00
|
|
|
let mut content = content.to_string();
|
|
|
|
|
|
|
|
|
|
// Skip any tags or html-encoded stuff
|
2025-05-11 11:46:01 +02:00
|
|
|
static HTML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<.*?>)").unwrap());
|
2021-11-10 00:41:44 +03:00
|
|
|
content = HTML.replace_all(&content, "").into();
|
|
|
|
|
const REPL_SUB: &[&str] = &["<", ">", "&", "'", """];
|
2018-03-07 07:02:06 -06:00
|
|
|
for sub in REPL_SUB {
|
|
|
|
|
content = content.replace(sub, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remove spaces and hashes indicating a header
|
2019-03-23 08:47:10 -04:00
|
|
|
let trimmed = content.trim().trim_start_matches('#').trim();
|
2018-03-07 07:02:06 -06:00
|
|
|
normalize_id(trimmed)
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-18 15:27:24 +00:00
|
|
|
/// Generate an ID for use with anchors which is derived from a "normalised"
|
|
|
|
|
/// string.
|
|
|
|
|
///
|
|
|
|
|
/// Each ID returned will be unique, if the same `id_counter` is provided on
|
|
|
|
|
/// each call.
|
|
|
|
|
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
|
2025-08-12 18:01:45 -07:00
|
|
|
let id = id_from_content(content);
|
2022-02-18 15:27:24 +00:00
|
|
|
|
|
|
|
|
// If we have headers with the same normalized id, append an incrementing counter
|
|
|
|
|
let id_count = id_counter.entry(id.clone()).or_insert(0);
|
|
|
|
|
let unique_id = match *id_count {
|
|
|
|
|
0 => id,
|
2024-09-21 15:53:59 -07:00
|
|
|
id_count => format!("{id}-{id_count}"),
|
2022-02-18 15:27:24 +00:00
|
|
|
};
|
|
|
|
|
*id_count += 1;
|
|
|
|
|
unique_id
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-07 22:10:48 +08:00
|
|
|
/// Prints a "backtrace" of some `Error`.
|
|
|
|
|
pub fn log_backtrace(e: &Error) {
|
|
|
|
|
error!("Error: {}", e);
|
|
|
|
|
|
2020-05-20 14:32:00 -07:00
|
|
|
for cause in e.chain().skip(1) {
|
2018-01-07 22:10:48 +08:00
|
|
|
error!("\tCaused By: {}", cause);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-21 12:20:21 -07:00
|
|
|
/// Escape `<` and `>` for HTML.
|
|
|
|
|
pub fn bracket_escape(mut s: &str) -> String {
|
2022-04-14 20:35:39 -07:00
|
|
|
let mut escaped = String::with_capacity(s.len());
|
|
|
|
|
let needs_escape: &[char] = &['<', '>'];
|
|
|
|
|
while let Some(next) = s.find(needs_escape) {
|
|
|
|
|
escaped.push_str(&s[..next]);
|
|
|
|
|
match s.as_bytes()[next] {
|
|
|
|
|
b'<' => escaped.push_str("<"),
|
|
|
|
|
b'>' => escaped.push_str(">"),
|
|
|
|
|
_ => unreachable!(),
|
|
|
|
|
}
|
|
|
|
|
s = &s[next + 1..];
|
|
|
|
|
}
|
|
|
|
|
escaped.push_str(s);
|
|
|
|
|
escaped
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 22:28:08 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
2025-07-21 15:46:36 -07:00
|
|
|
use super::bracket_escape;
|
2017-05-31 22:28:08 -07:00
|
|
|
|
2022-02-18 15:27:24 +00:00
|
|
|
#[allow(deprecated)]
|
|
|
|
|
mod id_from_content {
|
|
|
|
|
use super::super::id_from_content;
|
2018-03-07 07:02:06 -06:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn it_generates_anchors() {
|
2018-09-09 11:47:44 +08:00
|
|
|
assert_eq!(
|
|
|
|
|
id_from_content("## Method-call expressions"),
|
|
|
|
|
"method-call-expressions"
|
|
|
|
|
);
|
2018-12-04 00:10:09 +01:00
|
|
|
assert_eq!(id_from_content("## **Bold** title"), "bold-title");
|
|
|
|
|
assert_eq!(id_from_content("## `Code` title"), "code-title");
|
2021-11-10 00:41:44 +03:00
|
|
|
assert_eq!(
|
|
|
|
|
id_from_content("## title <span dir=rtl>foo</span>"),
|
|
|
|
|
"title-foo"
|
|
|
|
|
);
|
2018-09-09 11:47:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn it_generates_anchors_from_non_ascii_initial() {
|
2018-07-23 12:45:01 -05:00
|
|
|
assert_eq!(
|
|
|
|
|
id_from_content("## `--passes`: add more rustdoc passes"),
|
2018-09-09 11:47:44 +08:00
|
|
|
"--passes-add-more-rustdoc-passes"
|
2018-07-23 12:45:01 -05:00
|
|
|
);
|
|
|
|
|
assert_eq!(
|
2018-09-09 11:47:44 +08:00
|
|
|
id_from_content("## 中文標題 CJK title"),
|
|
|
|
|
"中文標題-cjk-title"
|
|
|
|
|
);
|
2018-12-04 00:10:09 +01:00
|
|
|
assert_eq!(id_from_content("## Über"), "Über");
|
2018-03-07 07:02:06 -06:00
|
|
|
}
|
2022-02-18 15:27:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mod html_munging {
|
|
|
|
|
use super::super::{normalize_id, unique_id_from_content};
|
2018-03-07 07:02:06 -06:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn it_normalizes_ids() {
|
2018-07-23 12:45:01 -05:00
|
|
|
assert_eq!(
|
|
|
|
|
normalize_id("`--passes`: add more rustdoc passes"),
|
2018-09-09 11:47:44 +08:00
|
|
|
"--passes-add-more-rustdoc-passes"
|
2018-07-23 12:45:01 -05:00
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
normalize_id("Method-call 🐙 expressions \u{1f47c}"),
|
|
|
|
|
"method-call--expressions-"
|
|
|
|
|
);
|
2018-09-09 11:47:44 +08:00
|
|
|
assert_eq!(normalize_id("_-_12345"), "_-_12345");
|
|
|
|
|
assert_eq!(normalize_id("12345"), "12345");
|
|
|
|
|
assert_eq!(normalize_id("中文"), "中文");
|
|
|
|
|
assert_eq!(normalize_id("にほんご"), "にほんご");
|
|
|
|
|
assert_eq!(normalize_id("한국어"), "한국어");
|
2018-03-07 07:02:06 -06:00
|
|
|
assert_eq!(normalize_id(""), "");
|
|
|
|
|
}
|
2022-02-18 15:27:24 +00:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn it_generates_unique_ids_from_content() {
|
|
|
|
|
// Same id if not given shared state
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
|
|
|
|
"中文標題-cjk-title"
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
|
|
|
|
"中文標題-cjk-title"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Different id if given shared state
|
|
|
|
|
let mut id_counter = Default::default();
|
|
|
|
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
|
|
|
|
|
"中文標題-cjk-title"
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
|
|
|
|
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
|
|
|
|
|
}
|
2018-03-07 07:02:06 -06:00
|
|
|
}
|
2022-04-14 20:35:39 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn escaped_brackets() {
|
|
|
|
|
assert_eq!(bracket_escape(""), "");
|
|
|
|
|
assert_eq!(bracket_escape("<"), "<");
|
|
|
|
|
assert_eq!(bracket_escape(">"), ">");
|
|
|
|
|
assert_eq!(bracket_escape("<>"), "<>");
|
|
|
|
|
assert_eq!(bracket_escape("<test>"), "<test>");
|
|
|
|
|
assert_eq!(bracket_escape("a<test>b"), "a<test>b");
|
2024-07-15 18:38:50 -07:00
|
|
|
assert_eq!(bracket_escape("'"), "'");
|
|
|
|
|
assert_eq!(bracket_escape("\\"), "\\");
|
|
|
|
|
}
|
2017-05-31 22:28:08 -07:00
|
|
|
}
|