Move preprocessor types to mdbook-preprocessor

This sets up mdbook-preprocessor with the intent of being the core
library that preprocessors use to implement the necessary interactions.
This commit is contained in:
Eric Huss 2025-07-21 15:19:18 -07:00
parent e123879c8c
commit 12285f505d
13 changed files with 111 additions and 99 deletions

7
Cargo.lock generated
View file

@ -1271,6 +1271,7 @@ dependencies = [
"ignore", "ignore",
"log", "log",
"mdbook-core", "mdbook-core",
"mdbook-preprocessor",
"mdbook-summary", "mdbook-summary",
"memchr", "memchr",
"notify", "notify",
@ -1313,15 +1314,17 @@ dependencies = [
name = "mdbook-preprocessor" name = "mdbook-preprocessor"
version = "0.5.0-alpha.1" version = "0.5.0-alpha.1"
dependencies = [ dependencies = [
"anyhow",
"mdbook-core", "mdbook-core",
"serde",
"serde_json",
] ]
[[package]] [[package]]
name = "mdbook-remove-emphasis" name = "mdbook-remove-emphasis"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "mdbook-preprocessor",
"mdbook",
"pulldown-cmark 0.12.2", "pulldown-cmark 0.12.2",
"pulldown-cmark-to-cmark", "pulldown-cmark-to-cmark",
"serde_json", "serde_json",

View file

@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow
anyhow = "1.0.98" anyhow = "1.0.98"
log = "0.4.27" log = "0.4.27"
mdbook-core = { path = "crates/mdbook-core" } mdbook-core = { path = "crates/mdbook-core" }
mdbook-preprocessor = { path = "crates/mdbook-preprocessor" }
mdbook-summary = { path = "crates/mdbook-summary" } mdbook-summary = { path = "crates/mdbook-summary" }
memchr = "2.7.5" memchr = "2.7.5"
pulldown-cmark = { version = "0.10.3", default-features = false, features = ["html"] } # Do not update, part of the public api. pulldown-cmark = { version = "0.10.3", default-features = false, features = ["html"] } # Do not update, part of the public api.
@ -61,6 +62,7 @@ handlebars = "6.0"
hex = "0.4.3" hex = "0.4.3"
log.workspace = true log.workspace = true
mdbook-core.workspace = true mdbook-core.workspace = true
mdbook-preprocessor.workspace = true
mdbook-summary.workspace = true mdbook-summary.workspace = true
memchr.workspace = true memchr.workspace = true
opener = "0.8.1" opener = "0.8.1"

View file

@ -8,7 +8,10 @@ repository.workspace = true
rust-version.workspace = true rust-version.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true
mdbook-core.workspace = true mdbook-core.workspace = true
serde.workspace = true
serde_json.workspace = true
[lints] [lints]
workspace = true workspace = true

View file

@ -1,3 +1,76 @@
//! Library to assist implementing an mdbook preprocessor. //! Library to assist implementing an mdbook preprocessor.
use anyhow::Context;
use mdbook_core::book::Book;
use mdbook_core::config::Config;
use mdbook_core::errors::Result;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::HashMap;
use std::io::Read;
use std::path::PathBuf;
pub use mdbook_core::MDBOOK_VERSION; pub use mdbook_core::MDBOOK_VERSION;
pub use mdbook_core::book;
pub use mdbook_core::config;
pub use mdbook_core::errors;
/// An operation which is run immediately after loading a book into memory and
/// before it gets rendered.
pub trait Preprocessor {
/// Get the `Preprocessor`'s name.
fn name(&self) -> &str;
/// Run this `Preprocessor`, allowing it to update the book before it is
/// given to a renderer.
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book>;
/// A hint to `MDBook` whether this preprocessor is compatible with a
/// particular renderer.
///
/// By default, always returns `true`.
fn supports_renderer(&self, _renderer: &str) -> bool {
true
}
}
/// Extra information for a `Preprocessor` to give them more context when
/// processing a book.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PreprocessorContext {
/// The location of the book directory on disk.
pub root: PathBuf,
/// The book configuration (`book.toml`).
pub config: Config,
/// The `Renderer` this preprocessor is being used with.
pub renderer: String,
/// The calling `mdbook` version.
pub mdbook_version: String,
/// Internal mapping of chapter titles.
///
/// This is used internally by mdbook to compute custom chapter titles.
/// This should not be used outside of mdbook's internals.
#[serde(skip)]
pub chapter_titles: RefCell<HashMap<PathBuf, String>>,
#[serde(skip)]
__non_exhaustive: (),
}
impl PreprocessorContext {
/// Create a new `PreprocessorContext`.
pub fn new(root: PathBuf, config: Config, renderer: String) -> Self {
PreprocessorContext {
root,
config,
renderer,
mdbook_version: crate::MDBOOK_VERSION.to_string(),
chapter_titles: RefCell::new(HashMap::new()),
__non_exhaustive: (),
}
}
}
/// Parses the input given to a preprocessor.
pub fn parse_input<R: Read>(reader: R) -> Result<(PreprocessorContext, Book)> {
serde_json::from_reader(reader).with_context(|| "Unable to parse the input")
}

View file

@ -1,10 +1,10 @@
//! A basic example of a preprocessor that does nothing. //! A basic example of a preprocessor that does nothing.
use crate::nop_lib::Nop; use crate::nop_lib::Nop;
use anyhow::Error;
use clap::{Arg, ArgMatches, Command}; use clap::{Arg, ArgMatches, Command};
use mdbook::book::Book; use mdbook_preprocessor::book::Book;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; use mdbook_preprocessor::errors::Result;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use semver::{Version, VersionReq}; use semver::{Version, VersionReq};
use std::io; use std::io;
use std::process; use std::process;
@ -33,8 +33,8 @@ fn main() {
} }
} }
fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> { fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<()> {
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?; let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?;
let book_version = Version::parse(&ctx.mdbook_version)?; let book_version = Version::parse(&ctx.mdbook_version)?;
let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?; let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?;
@ -88,7 +88,7 @@ mod nop_lib {
"nop-preprocessor" "nop-preprocessor"
} }
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book, Error> { fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book> {
// In testing we want to tell the preprocessor to blow up by setting a // In testing we want to tell the preprocessor to blow up by setting a
// particular config value // particular config value
if let Some(nop_cfg) = ctx.config.get_preprocessor(self.name()) { if let Some(nop_cfg) = ctx.config.get_preprocessor(self.name()) {
@ -149,7 +149,7 @@ mod nop_lib {
]"##; ]"##;
let input_json = input_json.as_bytes(); let input_json = input_json.as_bytes();
let (ctx, book) = mdbook::preprocess::CmdPreprocessor::parse_input(input_json).unwrap(); let (ctx, book) = mdbook_preprocessor::parse_input(input_json).unwrap();
let expected_book = book.clone(); let expected_book = book.clone();
let result = Nop::new().run(&ctx, book); let result = Nop::new().run(&ctx, book);
assert!(result.is_ok()); assert!(result.is_ok());

View file

@ -4,8 +4,7 @@ version = "0.1.0"
edition.workspace = true edition.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true mdbook-preprocessor.workspace = true
mdbook = { path = "../../.." }
pulldown-cmark = { version = "0.12.2", default-features = false } pulldown-cmark = { version = "0.12.2", default-features = false }
pulldown-cmark-to-cmark = "18.0.0" pulldown-cmark-to-cmark = "18.0.0"
serde_json = "1.0.132" serde_json = "1.0.132"

View file

@ -1,10 +1,9 @@
//! This is a demonstration of an mdBook preprocessor which parses markdown //! This is a demonstration of an mdBook preprocessor which parses markdown
//! and removes any instances of emphasis. //! and removes any instances of emphasis.
use anyhow::Error; use mdbook_preprocessor::book::{Book, BookItem, Chapter};
use mdbook::BookItem; use mdbook_preprocessor::errors::Result;
use mdbook::book::{Book, Chapter}; use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use pulldown_cmark::{Event, Parser, Tag, TagEnd}; use pulldown_cmark::{Event, Parser, Tag, TagEnd};
use std::io; use std::io;
@ -35,7 +34,7 @@ impl Preprocessor for RemoveEmphasis {
"remove-emphasis" "remove-emphasis"
} }
fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> { fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
let mut total = 0; let mut total = 0;
book.for_each_mut(|item| { book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else { let BookItem::Chapter(ch) = item else {
@ -55,7 +54,7 @@ impl Preprocessor for RemoveEmphasis {
} }
// ANCHOR: remove_emphasis // ANCHOR: remove_emphasis
fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result<String, Error> { fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result<String> {
let mut buf = String::with_capacity(chapter.content.len()); let mut buf = String::with_capacity(chapter.content.len());
let events = Parser::new(&chapter.content).filter(|e| match e { let events = Parser::new(&chapter.content).filter(|e| match e {
@ -71,9 +70,9 @@ fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Resu
} }
// ANCHOR_END: remove_emphasis // ANCHOR_END: remove_emphasis
pub fn handle_preprocessing() -> Result<(), Error> { pub fn handle_preprocessing() -> Result<()> {
let pre = RemoveEmphasis; let pre = RemoveEmphasis;
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?; let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?;
let processed_book = pre.run(&ctx, book)?; let processed_book = pre.run(&ctx, book)?;
serde_json::to_writer(io::stdout(), &processed_book)?; serde_json::to_writer(io::stdout(), &processed_book)?;

View file

@ -15,6 +15,7 @@ use log::{debug, error, info, log_enabled, trace, warn};
pub use mdbook_core::book::{Book, BookItem, BookItems, Chapter, SectionNumber}; pub use mdbook_core::book::{Book, BookItem, BookItems, Chapter, SectionNumber};
use mdbook_core::config::{Config, RustEdition}; use mdbook_core::config::{Config, RustEdition};
use mdbook_core::utils; use mdbook_core::utils;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
pub use mdbook_summary::{Link, Summary, SummaryItem, parse_summary}; pub use mdbook_summary::{Link, Summary, SummaryItem, parse_summary};
use std::ffi::OsString; use std::ffi::OsString;
use std::io::{IsTerminal, Write}; use std::io::{IsTerminal, Write};
@ -24,9 +25,7 @@ use tempfile::Builder as TempFileBuilder;
use toml::Value; use toml::Value;
use topological_sort::TopologicalSort; use topological_sort::TopologicalSort;
use crate::preprocess::{ use crate::preprocess::{CmdPreprocessor, IndexPreprocessor, LinkPreprocessor};
CmdPreprocessor, IndexPreprocessor, LinkPreprocessor, Preprocessor, PreprocessorContext,
};
use crate::renderer::{CmdRenderer, HtmlHandlebars, MarkdownRenderer, RenderContext, Renderer}; use crate::renderer::{CmdRenderer, HtmlHandlebars, MarkdownRenderer, RenderContext, Renderer};
/// The object used to manage and build a book. /// The object used to manage and build a book.

View file

@ -1,9 +1,9 @@
use super::{Preprocessor, PreprocessorContext};
use crate::book::Book; use crate::book::Book;
use anyhow::{Context, Result, bail, ensure}; use anyhow::{Context, Result, bail, ensure};
use log::{debug, trace, warn}; use log::{debug, trace, warn};
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use shlex::Shlex; use shlex::Shlex;
use std::io::{self, Read, Write}; use std::io::{self, Write};
use std::process::{Child, Command, Stdio}; use std::process::{Child, Command, Stdio};
/// A custom preprocessor which will shell out to a 3rd-party program. /// A custom preprocessor which will shell out to a 3rd-party program.
@ -41,12 +41,6 @@ impl CmdPreprocessor {
CmdPreprocessor { name, cmd } CmdPreprocessor { name, cmd }
} }
/// A convenience function custom preprocessors can use to parse the input
/// written to `stdin` by a `CmdRenderer`.
pub fn parse_input<R: Read>(reader: R) -> Result<(PreprocessorContext, Book)> {
serde_json::from_reader(reader).with_context(|| "Unable to parse the input")
}
fn write_input_to_child(&self, child: &mut Child, book: &Book, ctx: &PreprocessorContext) { fn write_input_to_child(&self, child: &mut Child, book: &Book, ctx: &PreprocessorContext) {
let stdin = child.stdin.take().expect("Child has stdin"); let stdin = child.stdin.take().expect("Child has stdin");
@ -200,7 +194,7 @@ mod tests {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
cmd.write_input(&mut buffer, &md.book, &ctx).unwrap(); cmd.write_input(&mut buffer, &md.book, &ctx).unwrap();
let (got_ctx, got_book) = CmdPreprocessor::parse_input(buffer.as_slice()).unwrap(); let (got_ctx, got_book) = mdbook_preprocessor::parse_input(buffer.as_slice()).unwrap();
assert_eq!(got_book, md.book); assert_eq!(got_book, md.book);
assert_eq!(got_ctx, ctx); assert_eq!(got_ctx, ctx);

View file

@ -1,10 +1,9 @@
use regex::Regex;
use std::{path::Path, sync::LazyLock};
use super::{Preprocessor, PreprocessorContext};
use crate::book::{Book, BookItem}; use crate::book::{Book, BookItem};
use anyhow::Result; use anyhow::Result;
use log::warn; use log::warn;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::Regex;
use std::{path::Path, sync::LazyLock};
/// A preprocessor for converting file name `README.md` to `index.md` since /// A preprocessor for converting file name `README.md` to `index.md` since
/// `README.md` is the de facto index file in markdown-based documentation. /// `README.md` is the de facto index file in markdown-based documentation.

View file

@ -1,18 +1,17 @@
use crate::book::{Book, BookItem};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use log::{error, warn};
use mdbook_core::utils::{ use mdbook_core::utils::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines, take_rustdoc_include_lines,
}; };
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::{CaptureMatches, Captures, Regex}; use regex::{CaptureMatches, Captures, Regex};
use std::fs; use std::fs;
use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo}; use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::LazyLock; use std::sync::LazyLock;
use super::{Preprocessor, PreprocessorContext};
use crate::book::{Book, BookItem};
use log::{error, warn};
const ESCAPE_CHAR: char = '\\'; const ESCAPE_CHAR: char = '\\';
const MAX_LINK_NESTED_DEPTH: usize = 10; const MAX_LINK_NESTED_DEPTH: usize = 10;

View file

@ -1,13 +1,5 @@
//! Book preprocessing. //! Book preprocessing.
use crate::book::Book;
use anyhow::Result;
use mdbook_core::config::Config;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::HashMap;
use std::path::PathBuf;
pub use self::cmd::CmdPreprocessor; pub use self::cmd::CmdPreprocessor;
pub use self::index::IndexPreprocessor; pub use self::index::IndexPreprocessor;
pub use self::links::LinkPreprocessor; pub use self::links::LinkPreprocessor;
@ -15,54 +7,3 @@ pub use self::links::LinkPreprocessor;
mod cmd; mod cmd;
mod index; mod index;
mod links; mod links;
/// Extra information for a `Preprocessor` to give them more context when
/// processing a book.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PreprocessorContext {
/// The location of the book directory on disk.
pub root: PathBuf,
/// The book configuration (`book.toml`).
pub config: Config,
/// The `Renderer` this preprocessor is being used with.
pub renderer: String,
/// The calling `mdbook` version.
pub mdbook_version: String,
#[serde(skip)]
pub(crate) chapter_titles: RefCell<HashMap<PathBuf, String>>,
#[serde(skip)]
__non_exhaustive: (),
}
impl PreprocessorContext {
/// Create a new `PreprocessorContext`.
pub(crate) fn new(root: PathBuf, config: Config, renderer: String) -> Self {
PreprocessorContext {
root,
config,
renderer,
mdbook_version: crate::MDBOOK_VERSION.to_string(),
chapter_titles: RefCell::new(HashMap::new()),
__non_exhaustive: (),
}
}
}
/// An operation which is run immediately after loading a book into memory and
/// before it gets rendered.
pub trait Preprocessor {
/// Get the `Preprocessor`'s name.
fn name(&self) -> &str;
/// Run this `Preprocessor`, allowing it to update the book before it is
/// given to a renderer.
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book>;
/// A hint to `MDBook` whether this preprocessor is compatible with a
/// particular renderer.
///
/// By default, always returns `true`.
fn supports_renderer(&self, _renderer: &str) -> bool {
true
}
}

View file

@ -3,7 +3,8 @@
use crate::prelude::*; use crate::prelude::*;
use anyhow::Result; use anyhow::Result;
use mdbook::book::Book; use mdbook::book::Book;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; use mdbook::preprocess::CmdPreprocessor;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
struct Spy(Arc<Mutex<Inner>>); struct Spy(Arc<Mutex<Inner>>);