Add a helper for defining a regex
This adds the `static_regex` macro to help with defining a regex.
This commit is contained in:
parent
8bb9a7ff42
commit
e3bb655663
6 changed files with 50 additions and 49 deletions
|
|
@ -1,11 +1,9 @@
|
|||
//! Various helpers and utilities.
|
||||
|
||||
use anyhow::Error;
|
||||
use regex::Regex;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::sync::LazyLock;
|
||||
use tracing::error;
|
||||
|
||||
pub mod fs;
|
||||
|
|
@ -19,10 +17,23 @@ pub use self::string::{
|
|||
take_rustdoc_include_lines,
|
||||
};
|
||||
|
||||
/// Defines a `static` with a [`regex::Regex`].
|
||||
#[macro_export]
|
||||
macro_rules! static_regex {
|
||||
($name:ident, $regex:literal) => {
|
||||
static $name: std::sync::LazyLock<regex::Regex> =
|
||||
std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
|
||||
};
|
||||
($name:ident, bytes, $regex:literal) => {
|
||||
static $name: std::sync::LazyLock<regex::bytes::Regex> =
|
||||
std::sync::LazyLock::new(|| regex::bytes::Regex::new($regex).unwrap());
|
||||
};
|
||||
}
|
||||
|
||||
/// Replaces multiple consecutive whitespace characters with a single space character.
|
||||
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s\s+").unwrap());
|
||||
RE.replace_all(text, " ")
|
||||
static_regex!(WS, r"\s\s+");
|
||||
WS.replace_all(text, " ")
|
||||
}
|
||||
|
||||
/// Convert the given string to a valid HTML element ID.
|
||||
|
|
@ -48,7 +59,7 @@ fn id_from_content(content: &str) -> String {
|
|||
let mut content = content.to_string();
|
||||
|
||||
// Skip any tags or html-encoded stuff
|
||||
static HTML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<.*?>)").unwrap());
|
||||
static_regex!(HTML, r"(<.*?>)");
|
||||
content = HTML.replace_all(&content, "").into();
|
||||
const REPL_SUB: &[&str] = &["<", ">", "&", "'", """];
|
||||
for sub in REPL_SUB {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use regex::Regex;
|
||||
use crate::static_regex;
|
||||
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||
use std::ops::RangeBounds;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
/// Take a range of lines from a string.
|
||||
pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
|
||||
|
|
@ -24,10 +23,8 @@ pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
static ANCHOR_START: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap());
|
||||
static ANCHOR_END: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap());
|
||||
static_regex!(ANCHOR_START, r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)");
|
||||
static_regex!(ANCHOR_END, r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)");
|
||||
|
||||
/// Take anchored lines from a string.
|
||||
/// Lines containing anchor are ignored.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use anyhow::Result;
|
||||
use mdbook_core::book::{Book, BookItem};
|
||||
use mdbook_core::static_regex;
|
||||
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
|
||||
use regex::Regex;
|
||||
use std::{path::Path, sync::LazyLock};
|
||||
use std::path::Path;
|
||||
use tracing::warn;
|
||||
|
||||
/// A preprocessor for converting file name `README.md` to `index.md` since
|
||||
|
|
@ -68,9 +68,9 @@ fn warn_readme_name_conflict<P: AsRef<Path>>(readme_path: P, index_path: P) {
|
|||
}
|
||||
|
||||
fn is_readme_file<P: AsRef<Path>>(path: P) -> bool {
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)^readme$").unwrap());
|
||||
static_regex!(README, r"(?i)^readme$");
|
||||
|
||||
RE.is_match(
|
||||
README.is_match(
|
||||
path.as_ref()
|
||||
.file_stem()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
use anyhow::{Context, Result};
|
||||
use mdbook_core::book::{Book, BookItem};
|
||||
use mdbook_core::static_regex;
|
||||
use mdbook_core::utils::{
|
||||
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
|
||||
take_rustdoc_include_lines,
|
||||
};
|
||||
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
|
||||
use regex::{CaptureMatches, Captures, Regex};
|
||||
use regex::{CaptureMatches, Captures};
|
||||
use std::fs;
|
||||
use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::LazyLock;
|
||||
use tracing::{error, warn};
|
||||
|
||||
const ESCAPE_CHAR: char = '\\';
|
||||
|
|
@ -408,10 +408,8 @@ impl<'a> Iterator for LinkIter<'a> {
|
|||
}
|
||||
|
||||
fn find_links(contents: &str) -> LinkIter<'_> {
|
||||
// lazily compute following regex
|
||||
// r"\\\{\{#.*\}\}|\{\{#([a-zA-Z0-9]+)\s*([^}]+)\}\}")?;
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
static_regex!(
|
||||
LINK,
|
||||
r"(?x) # insignificant whitespace mode
|
||||
\\\{\{\#.*\}\} # match escaped link
|
||||
| # or
|
||||
|
|
@ -419,12 +417,10 @@ fn find_links(contents: &str) -> LinkIter<'_> {
|
|||
\#([a-zA-Z0-9_]+) # link type
|
||||
\s+ # separating whitespace
|
||||
([^}]+) # link target path and space separated properties
|
||||
\}\} # link closing parens",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
\}\} # link closing parens"
|
||||
);
|
||||
|
||||
LinkIter(RE.captures_iter(contents))
|
||||
LinkIter(LINK.captures_iter(contents))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -5,18 +5,17 @@ use anyhow::{Context, Result, bail};
|
|||
use handlebars::Handlebars;
|
||||
use mdbook_core::book::{Book, BookItem, Chapter};
|
||||
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
|
||||
use mdbook_core::utils;
|
||||
use mdbook_core::utils::fs::get_404_output_file;
|
||||
use mdbook_core::{static_regex, utils};
|
||||
use mdbook_markdown::render_markdown;
|
||||
use mdbook_renderer::{RenderContext, Renderer};
|
||||
use regex::{Captures, Regex};
|
||||
use regex::Captures;
|
||||
use serde_json::json;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::LazyLock;
|
||||
use tracing::error;
|
||||
use tracing::{debug, info, trace, warn};
|
||||
|
||||
|
|
@ -702,9 +701,10 @@ fn make_data(
|
|||
/// Goes through the rendered HTML, making sure all header tags have
|
||||
/// an anchor respectively so people can link to sections directly.
|
||||
fn build_header_links(html: &str) -> String {
|
||||
static BUILD_HEADER_LINKS: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#).unwrap()
|
||||
});
|
||||
static_regex!(
|
||||
BUILD_HEADER_LINKS,
|
||||
r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#
|
||||
);
|
||||
static IGNORE_CLASS: &[&str] = &["menu-title", "mdbook-help-title"];
|
||||
|
||||
let mut id_counter = HashMap::new();
|
||||
|
|
@ -758,8 +758,8 @@ fn insert_link_into_header(
|
|||
fn convert_fontawesome(html: &str) -> String {
|
||||
use font_awesome_as_a_crate as fa;
|
||||
|
||||
let regex = Regex::new(r##"<i([^>]+)class="([^"]+)"([^>]*)></i>"##).unwrap();
|
||||
regex
|
||||
static_regex!(FA_RE, r#"<i([^>]+)class="([^"]+)"([^>]*)></i>"#);
|
||||
FA_RE
|
||||
.replace_all(html, |caps: &Captures<'_>| {
|
||||
let text = &caps[0];
|
||||
let before = &caps[1];
|
||||
|
|
@ -811,8 +811,7 @@ fn convert_fontawesome(html: &str) -> String {
|
|||
// ```
|
||||
// This function replaces all commas by spaces in the code block classes
|
||||
fn fix_code_blocks(html: &str) -> String {
|
||||
static FIX_CODE_BLOCKS: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r##"<code([^>]+)class="([^"]+)"([^>]*)>"##).unwrap());
|
||||
static_regex!(FIX_CODE_BLOCKS, r#"<code([^>]+)class="([^"]+)"([^>]*)>"#);
|
||||
|
||||
FIX_CODE_BLOCKS
|
||||
.replace_all(html, |caps: &Captures<'_>| {
|
||||
|
|
@ -825,8 +824,10 @@ fn fix_code_blocks(html: &str) -> String {
|
|||
.into_owned()
|
||||
}
|
||||
|
||||
static CODE_BLOCK_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r##"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"##).unwrap());
|
||||
static_regex!(
|
||||
CODE_BLOCK_RE,
|
||||
r#"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"#
|
||||
);
|
||||
|
||||
fn add_playground_pre(
|
||||
html: &str,
|
||||
|
|
@ -895,10 +896,8 @@ fn add_playground_pre(
|
|||
/// Modifies all `<code>` blocks to convert "hidden" lines and to wrap them in
|
||||
/// a `<span class="boring">`.
|
||||
fn hide_lines(html: &str, code_config: &Code) -> String {
|
||||
static LANGUAGE_REGEX: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"\blanguage-(\w+)\b").unwrap());
|
||||
static HIDELINES_REGEX: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"\bhidelines=(\S+)").unwrap());
|
||||
static_regex!(LANGUAGE_REGEX, r"\blanguage-(\w+)\b");
|
||||
static_regex!(HIDELINES_REGEX, r"\bhidelines=(\S+)");
|
||||
|
||||
CODE_BLOCK_RE
|
||||
.replace_all(html, |caps: &Captures<'_>| {
|
||||
|
|
@ -939,8 +938,7 @@ fn hide_lines(html: &str, code_config: &Code) -> String {
|
|||
}
|
||||
|
||||
fn hide_lines_rust(content: &str) -> String {
|
||||
static BORING_LINES_REGEX: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^(\s*)#(.?)(.*)$").unwrap());
|
||||
static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
|
||||
|
||||
let mut result = String::with_capacity(content.len());
|
||||
let mut lines = content.lines().peekable();
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@ use super::helpers::resources::ResourceHelper;
|
|||
use crate::theme::{self, Theme, playground_editor};
|
||||
use anyhow::{Context, Result};
|
||||
use mdbook_core::config::HtmlConfig;
|
||||
use mdbook_core::static_regex;
|
||||
use mdbook_core::utils;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::LazyLock;
|
||||
use tracing::debug;
|
||||
|
||||
/// Map static files to their final names and contents.
|
||||
|
|
@ -191,11 +191,10 @@ impl StaticFiles {
|
|||
|
||||
pub(super) fn write_files(self, destination: &Path) -> Result<ResourceHelper> {
|
||||
use mdbook_core::utils::fs::write_file;
|
||||
use regex::bytes::{Captures, Regex};
|
||||
use regex::bytes::Captures;
|
||||
// The `{{ resource "name" }}` directive in static resources look like
|
||||
// handlebars syntax, even if they technically aren't.
|
||||
static RESOURCE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r#"\{\{ resource "([^"]+)" \}\}"#).unwrap());
|
||||
static_regex!(RESOURCE, bytes, r#"\{\{ resource "([^"]+)" \}\}"#);
|
||||
fn replace_all<'a>(
|
||||
hash_map: &HashMap<String, String>,
|
||||
data: &'a [u8],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue