Merge pull request #2833 from ehuss/static-regex

Add a helper for defining a regex
This commit is contained in:
Eric Huss 2025-09-12 13:57:30 +00:00 committed by GitHub
commit 166a972e9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 50 additions and 49 deletions

View file

@ -1,11 +1,9 @@
//! Various helpers and utilities.
use anyhow::Error;
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::sync::LazyLock;
use tracing::error;
pub mod fs;
@ -19,10 +17,23 @@ pub use self::string::{
take_rustdoc_include_lines,
};
/// Defines a `static` with a [`regex::Regex`].
#[macro_export]
macro_rules! static_regex {
($name:ident, $regex:literal) => {
static $name: std::sync::LazyLock<regex::Regex> =
std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
};
($name:ident, bytes, $regex:literal) => {
static $name: std::sync::LazyLock<regex::bytes::Regex> =
std::sync::LazyLock::new(|| regex::bytes::Regex::new($regex).unwrap());
};
}
/// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s\s+").unwrap());
RE.replace_all(text, " ")
static_regex!(WS, r"\s\s+");
WS.replace_all(text, " ")
}
/// Convert the given string to a valid HTML element ID.
@ -48,7 +59,7 @@ fn id_from_content(content: &str) -> String {
let mut content = content.to_string();
// Skip any tags or html-encoded stuff
static HTML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<.*?>)").unwrap());
static_regex!(HTML, r"(<.*?>)");
content = HTML.replace_all(&content, "").into();
const REPL_SUB: &[&str] = &["&lt;", "&gt;", "&amp;", "&#39;", "&quot;"];
for sub in REPL_SUB {

View file

@ -1,7 +1,6 @@
use regex::Regex;
use crate::static_regex;
use std::ops::Bound::{Excluded, Included, Unbounded};
use std::ops::RangeBounds;
use std::sync::LazyLock;
/// Take a range of lines from a string.
pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
@ -24,10 +23,8 @@ pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
}
}
static ANCHOR_START: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap());
static ANCHOR_END: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap());
static_regex!(ANCHOR_START, r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)");
static_regex!(ANCHOR_END, r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)");
/// Take anchored lines from a string.
/// Lines containing anchor are ignored.

View file

@ -1,8 +1,8 @@
use anyhow::Result;
use mdbook_core::book::{Book, BookItem};
use mdbook_core::static_regex;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::Regex;
use std::{path::Path, sync::LazyLock};
use std::path::Path;
use tracing::warn;
/// A preprocessor for converting file name `README.md` to `index.md` since
@ -68,9 +68,9 @@ fn warn_readme_name_conflict<P: AsRef<Path>>(readme_path: P, index_path: P) {
}
fn is_readme_file<P: AsRef<Path>>(path: P) -> bool {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)^readme$").unwrap());
static_regex!(README, r"(?i)^readme$");
RE.is_match(
README.is_match(
path.as_ref()
.file_stem()
.and_then(std::ffi::OsStr::to_str)

View file

@ -1,15 +1,15 @@
use anyhow::{Context, Result};
use mdbook_core::book::{Book, BookItem};
use mdbook_core::static_regex;
use mdbook_core::utils::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines,
};
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::{CaptureMatches, Captures, Regex};
use regex::{CaptureMatches, Captures};
use std::fs;
use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo};
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::{error, warn};
const ESCAPE_CHAR: char = '\\';
@ -408,23 +408,19 @@ impl<'a> Iterator for LinkIter<'a> {
}
fn find_links(contents: &str) -> LinkIter<'_> {
// lazily compute following regex
// r"\\\{\{#.*\}\}|\{\{#([a-zA-Z0-9]+)\s*([^}]+)\}\}")?;
static RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?x) # insignificant whitespace mode
static_regex!(
LINK,
r"(?x) # insignificant whitespace mode
\\\{\{\#.*\}\} # match escaped link
| # or
\{\{\s* # link opening parens and whitespace
\#([a-zA-Z0-9_]+) # link type
\s+ # separating whitespace
([^}]+) # link target path and space separated properties
\}\} # link closing parens",
)
.unwrap()
});
\}\} # link closing parens"
);
LinkIter(RE.captures_iter(contents))
LinkIter(LINK.captures_iter(contents))
}
#[cfg(test)]

View file

@ -5,18 +5,17 @@ use anyhow::{Context, Result, bail};
use handlebars::Handlebars;
use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils;
use mdbook_core::utils::fs::get_404_output_file;
use mdbook_core::{static_regex, utils};
use mdbook_markdown::render_markdown;
use mdbook_renderer::{RenderContext, Renderer};
use regex::{Captures, Regex};
use regex::Captures;
use serde_json::json;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::fs::{self, File};
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::error;
use tracing::{debug, info, trace, warn};
@ -702,9 +701,10 @@ fn make_data(
/// Goes through the rendered HTML, making sure all header tags have
/// an anchor respectively so people can link to sections directly.
fn build_header_links(html: &str) -> String {
static BUILD_HEADER_LINKS: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#).unwrap()
});
static_regex!(
BUILD_HEADER_LINKS,
r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#
);
static IGNORE_CLASS: &[&str] = &["menu-title", "mdbook-help-title"];
let mut id_counter = HashMap::new();
@ -758,8 +758,8 @@ fn insert_link_into_header(
fn convert_fontawesome(html: &str) -> String {
use font_awesome_as_a_crate as fa;
let regex = Regex::new(r##"<i([^>]+)class="([^"]+)"([^>]*)></i>"##).unwrap();
regex
static_regex!(FA_RE, r#"<i([^>]+)class="([^"]+)"([^>]*)></i>"#);
FA_RE
.replace_all(html, |caps: &Captures<'_>| {
let text = &caps[0];
let before = &caps[1];
@ -811,8 +811,7 @@ fn convert_fontawesome(html: &str) -> String {
// ```
// This function replaces all commas by spaces in the code block classes
fn fix_code_blocks(html: &str) -> String {
static FIX_CODE_BLOCKS: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r##"<code([^>]+)class="([^"]+)"([^>]*)>"##).unwrap());
static_regex!(FIX_CODE_BLOCKS, r#"<code([^>]+)class="([^"]+)"([^>]*)>"#);
FIX_CODE_BLOCKS
.replace_all(html, |caps: &Captures<'_>| {
@ -825,8 +824,10 @@ fn fix_code_blocks(html: &str) -> String {
.into_owned()
}
static CODE_BLOCK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r##"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"##).unwrap());
static_regex!(
CODE_BLOCK_RE,
r#"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"#
);
fn add_playground_pre(
html: &str,
@ -895,10 +896,8 @@ fn add_playground_pre(
/// Modifies all `<code>` blocks to convert "hidden" lines and to wrap them in
/// a `<span class="boring">`.
fn hide_lines(html: &str, code_config: &Code) -> String {
static LANGUAGE_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\blanguage-(\w+)\b").unwrap());
static HIDELINES_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bhidelines=(\S+)").unwrap());
static_regex!(LANGUAGE_REGEX, r"\blanguage-(\w+)\b");
static_regex!(HIDELINES_REGEX, r"\bhidelines=(\S+)");
CODE_BLOCK_RE
.replace_all(html, |caps: &Captures<'_>| {
@ -939,8 +938,7 @@ fn hide_lines(html: &str, code_config: &Code) -> String {
}
fn hide_lines_rust(content: &str) -> String {
static BORING_LINES_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(\s*)#(.?)(.*)$").unwrap());
static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
let mut result = String::with_capacity(content.len());
let mut lines = content.lines().peekable();

View file

@ -4,12 +4,12 @@ use super::helpers::resources::ResourceHelper;
use crate::theme::{self, Theme, playground_editor};
use anyhow::{Context, Result};
use mdbook_core::config::HtmlConfig;
use mdbook_core::static_regex;
use mdbook_core::utils;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fs::{self, File};
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::debug;
/// Map static files to their final names and contents.
@ -191,11 +191,10 @@ impl StaticFiles {
pub(super) fn write_files(self, destination: &Path) -> Result<ResourceHelper> {
use mdbook_core::utils::fs::write_file;
use regex::bytes::{Captures, Regex};
use regex::bytes::Captures;
// The `{{ resource "name" }}` directive in static resources look like
// handlebars syntax, even if they technically aren't.
static RESOURCE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\{\{ resource "([^"]+)" \}\}"#).unwrap());
static_regex!(RESOURCE, bytes, r#"\{\{ resource "([^"]+)" \}\}"#);
fn replace_all<'a>(
hash_map: &HashMap<String, String>,
data: &'a [u8],