Merge pull request #2833 from ehuss/static-regex

Add a helper for defining a regex
This commit is contained in:
Eric Huss 2025-09-12 13:57:30 +00:00 committed by GitHub
commit 166a972e9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 50 additions and 49 deletions

View file

@ -1,11 +1,9 @@
//! Various helpers and utilities. //! Various helpers and utilities.
use anyhow::Error; use anyhow::Error;
use regex::Regex;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Write; use std::fmt::Write;
use std::sync::LazyLock;
use tracing::error; use tracing::error;
pub mod fs; pub mod fs;
@ -19,10 +17,23 @@ pub use self::string::{
take_rustdoc_include_lines, take_rustdoc_include_lines,
}; };
/// Defines a `static` with a [`regex::Regex`].
#[macro_export]
macro_rules! static_regex {
($name:ident, $regex:literal) => {
static $name: std::sync::LazyLock<regex::Regex> =
std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
};
($name:ident, bytes, $regex:literal) => {
static $name: std::sync::LazyLock<regex::bytes::Regex> =
std::sync::LazyLock::new(|| regex::bytes::Regex::new($regex).unwrap());
};
}
/// Replaces multiple consecutive whitespace characters with a single space character. /// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> { pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s\s+").unwrap()); static_regex!(WS, r"\s\s+");
RE.replace_all(text, " ") WS.replace_all(text, " ")
} }
/// Convert the given string to a valid HTML element ID. /// Convert the given string to a valid HTML element ID.
@ -48,7 +59,7 @@ fn id_from_content(content: &str) -> String {
let mut content = content.to_string(); let mut content = content.to_string();
// Skip any tags or html-encoded stuff // Skip any tags or html-encoded stuff
static HTML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<.*?>)").unwrap()); static_regex!(HTML, r"(<.*?>)");
content = HTML.replace_all(&content, "").into(); content = HTML.replace_all(&content, "").into();
const REPL_SUB: &[&str] = &["&lt;", "&gt;", "&amp;", "&#39;", "&quot;"]; const REPL_SUB: &[&str] = &["&lt;", "&gt;", "&amp;", "&#39;", "&quot;"];
for sub in REPL_SUB { for sub in REPL_SUB {

View file

@ -1,7 +1,6 @@
use regex::Regex; use crate::static_regex;
use std::ops::Bound::{Excluded, Included, Unbounded}; use std::ops::Bound::{Excluded, Included, Unbounded};
use std::ops::RangeBounds; use std::ops::RangeBounds;
use std::sync::LazyLock;
/// Take a range of lines from a string. /// Take a range of lines from a string.
pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String { pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
@ -24,10 +23,8 @@ pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
} }
} }
static ANCHOR_START: LazyLock<Regex> = static_regex!(ANCHOR_START, r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)");
LazyLock::new(|| Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap()); static_regex!(ANCHOR_END, r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)");
static ANCHOR_END: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap());
/// Take anchored lines from a string. /// Take anchored lines from a string.
/// Lines containing anchor are ignored. /// Lines containing anchor are ignored.

View file

@ -1,8 +1,8 @@
use anyhow::Result; use anyhow::Result;
use mdbook_core::book::{Book, BookItem}; use mdbook_core::book::{Book, BookItem};
use mdbook_core::static_regex;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::Regex; use std::path::Path;
use std::{path::Path, sync::LazyLock};
use tracing::warn; use tracing::warn;
/// A preprocessor for converting file name `README.md` to `index.md` since /// A preprocessor for converting file name `README.md` to `index.md` since
@ -68,9 +68,9 @@ fn warn_readme_name_conflict<P: AsRef<Path>>(readme_path: P, index_path: P) {
} }
fn is_readme_file<P: AsRef<Path>>(path: P) -> bool { fn is_readme_file<P: AsRef<Path>>(path: P) -> bool {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)^readme$").unwrap()); static_regex!(README, r"(?i)^readme$");
RE.is_match( README.is_match(
path.as_ref() path.as_ref()
.file_stem() .file_stem()
.and_then(std::ffi::OsStr::to_str) .and_then(std::ffi::OsStr::to_str)

View file

@ -1,15 +1,15 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use mdbook_core::book::{Book, BookItem}; use mdbook_core::book::{Book, BookItem};
use mdbook_core::static_regex;
use mdbook_core::utils::{ use mdbook_core::utils::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines, take_rustdoc_include_lines,
}; };
use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::{CaptureMatches, Captures, Regex}; use regex::{CaptureMatches, Captures};
use std::fs; use std::fs;
use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo}; use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::{error, warn}; use tracing::{error, warn};
const ESCAPE_CHAR: char = '\\'; const ESCAPE_CHAR: char = '\\';
@ -408,23 +408,19 @@ impl<'a> Iterator for LinkIter<'a> {
} }
fn find_links(contents: &str) -> LinkIter<'_> { fn find_links(contents: &str) -> LinkIter<'_> {
// lazily compute following regex static_regex!(
// r"\\\{\{#.*\}\}|\{\{#([a-zA-Z0-9]+)\s*([^}]+)\}\}")?; LINK,
static RE: LazyLock<Regex> = LazyLock::new(|| { r"(?x) # insignificant whitespace mode
Regex::new(
r"(?x) # insignificant whitespace mode
\\\{\{\#.*\}\} # match escaped link \\\{\{\#.*\}\} # match escaped link
| # or | # or
\{\{\s* # link opening parens and whitespace \{\{\s* # link opening parens and whitespace
\#([a-zA-Z0-9_]+) # link type \#([a-zA-Z0-9_]+) # link type
\s+ # separating whitespace \s+ # separating whitespace
([^}]+) # link target path and space separated properties ([^}]+) # link target path and space separated properties
\}\} # link closing parens", \}\} # link closing parens"
) );
.unwrap()
});
LinkIter(RE.captures_iter(contents)) LinkIter(LINK.captures_iter(contents))
} }
#[cfg(test)] #[cfg(test)]

View file

@ -5,18 +5,17 @@ use anyhow::{Context, Result, bail};
use handlebars::Handlebars; use handlebars::Handlebars;
use mdbook_core::book::{Book, BookItem, Chapter}; use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition}; use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils;
use mdbook_core::utils::fs::get_404_output_file; use mdbook_core::utils::fs::get_404_output_file;
use mdbook_core::{static_regex, utils};
use mdbook_markdown::render_markdown; use mdbook_markdown::render_markdown;
use mdbook_renderer::{RenderContext, Renderer}; use mdbook_renderer::{RenderContext, Renderer};
use regex::{Captures, Regex}; use regex::Captures;
use serde_json::json; use serde_json::json;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::{self, File}; use std::fs::{self, File};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::error; use tracing::error;
use tracing::{debug, info, trace, warn}; use tracing::{debug, info, trace, warn};
@ -702,9 +701,10 @@ fn make_data(
/// Goes through the rendered HTML, making sure all header tags have /// Goes through the rendered HTML, making sure all header tags have
/// an anchor respectively so people can link to sections directly. /// an anchor respectively so people can link to sections directly.
fn build_header_links(html: &str) -> String { fn build_header_links(html: &str) -> String {
static BUILD_HEADER_LINKS: LazyLock<Regex> = LazyLock::new(|| { static_regex!(
Regex::new(r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#).unwrap() BUILD_HEADER_LINKS,
}); r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#
);
static IGNORE_CLASS: &[&str] = &["menu-title", "mdbook-help-title"]; static IGNORE_CLASS: &[&str] = &["menu-title", "mdbook-help-title"];
let mut id_counter = HashMap::new(); let mut id_counter = HashMap::new();
@ -758,8 +758,8 @@ fn insert_link_into_header(
fn convert_fontawesome(html: &str) -> String { fn convert_fontawesome(html: &str) -> String {
use font_awesome_as_a_crate as fa; use font_awesome_as_a_crate as fa;
let regex = Regex::new(r##"<i([^>]+)class="([^"]+)"([^>]*)></i>"##).unwrap(); static_regex!(FA_RE, r#"<i([^>]+)class="([^"]+)"([^>]*)></i>"#);
regex FA_RE
.replace_all(html, |caps: &Captures<'_>| { .replace_all(html, |caps: &Captures<'_>| {
let text = &caps[0]; let text = &caps[0];
let before = &caps[1]; let before = &caps[1];
@ -811,8 +811,7 @@ fn convert_fontawesome(html: &str) -> String {
// ``` // ```
// This function replaces all commas by spaces in the code block classes // This function replaces all commas by spaces in the code block classes
fn fix_code_blocks(html: &str) -> String { fn fix_code_blocks(html: &str) -> String {
static FIX_CODE_BLOCKS: LazyLock<Regex> = static_regex!(FIX_CODE_BLOCKS, r#"<code([^>]+)class="([^"]+)"([^>]*)>"#);
LazyLock::new(|| Regex::new(r##"<code([^>]+)class="([^"]+)"([^>]*)>"##).unwrap());
FIX_CODE_BLOCKS FIX_CODE_BLOCKS
.replace_all(html, |caps: &Captures<'_>| { .replace_all(html, |caps: &Captures<'_>| {
@ -825,8 +824,10 @@ fn fix_code_blocks(html: &str) -> String {
.into_owned() .into_owned()
} }
static CODE_BLOCK_RE: LazyLock<Regex> = static_regex!(
LazyLock::new(|| Regex::new(r##"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"##).unwrap()); CODE_BLOCK_RE,
r#"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"#
);
fn add_playground_pre( fn add_playground_pre(
html: &str, html: &str,
@ -895,10 +896,8 @@ fn add_playground_pre(
/// Modifies all `<code>` blocks to convert "hidden" lines and to wrap them in /// Modifies all `<code>` blocks to convert "hidden" lines and to wrap them in
/// a `<span class="boring">`. /// a `<span class="boring">`.
fn hide_lines(html: &str, code_config: &Code) -> String { fn hide_lines(html: &str, code_config: &Code) -> String {
static LANGUAGE_REGEX: LazyLock<Regex> = static_regex!(LANGUAGE_REGEX, r"\blanguage-(\w+)\b");
LazyLock::new(|| Regex::new(r"\blanguage-(\w+)\b").unwrap()); static_regex!(HIDELINES_REGEX, r"\bhidelines=(\S+)");
static HIDELINES_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bhidelines=(\S+)").unwrap());
CODE_BLOCK_RE CODE_BLOCK_RE
.replace_all(html, |caps: &Captures<'_>| { .replace_all(html, |caps: &Captures<'_>| {
@ -939,8 +938,7 @@ fn hide_lines(html: &str, code_config: &Code) -> String {
} }
fn hide_lines_rust(content: &str) -> String { fn hide_lines_rust(content: &str) -> String {
static BORING_LINES_REGEX: LazyLock<Regex> = static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
LazyLock::new(|| Regex::new(r"^(\s*)#(.?)(.*)$").unwrap());
let mut result = String::with_capacity(content.len()); let mut result = String::with_capacity(content.len());
let mut lines = content.lines().peekable(); let mut lines = content.lines().peekable();

View file

@ -4,12 +4,12 @@ use super::helpers::resources::ResourceHelper;
use crate::theme::{self, Theme, playground_editor}; use crate::theme::{self, Theme, playground_editor};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use mdbook_core::config::HtmlConfig; use mdbook_core::config::HtmlConfig;
use mdbook_core::static_regex;
use mdbook_core::utils; use mdbook_core::utils;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::{self, File}; use std::fs::{self, File};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::debug; use tracing::debug;
/// Map static files to their final names and contents. /// Map static files to their final names and contents.
@ -191,11 +191,10 @@ impl StaticFiles {
pub(super) fn write_files(self, destination: &Path) -> Result<ResourceHelper> { pub(super) fn write_files(self, destination: &Path) -> Result<ResourceHelper> {
use mdbook_core::utils::fs::write_file; use mdbook_core::utils::fs::write_file;
use regex::bytes::{Captures, Regex}; use regex::bytes::Captures;
// The `{{ resource "name" }}` directive in static resources look like // The `{{ resource "name" }}` directive in static resources look like
// handlebars syntax, even if they technically aren't. // handlebars syntax, even if they technically aren't.
static RESOURCE: LazyLock<Regex> = static_regex!(RESOURCE, bytes, r#"\{\{ resource "([^"]+)" \}\}"#);
LazyLock::new(|| Regex::new(r#"\{\{ resource "([^"]+)" \}\}"#).unwrap());
fn replace_all<'a>( fn replace_all<'a>(
hash_map: &HashMap<String, String>, hash_map: &HashMap<String, String>,
data: &'a [u8], data: &'a [u8],