diff --git a/Cargo.lock b/Cargo.lock index c244e651..fc0459db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -869,6 +869,7 @@ dependencies = [ "tempfile", "tokio", "toml", + "topological-sort", "walkdir", "warp", ] @@ -1775,6 +1776,12 @@ dependencies = [ "serde", ] +[[package]] +name = "topological-sort" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa7c7f42dea4b1b99439786f5633aeb9c14c1b53f75e282803c2ec2ad545873c" + [[package]] name = "tower-service" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index 045c66b9..a6ab6c86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ serde_json = "1.0" shlex = "1" tempfile = "3.0" toml = "0.5.1" +topological-sort = "0.1.0" # Watch feature notify = { version = "4.0", optional = true } diff --git a/guide/src/format/configuration/preprocessors.md b/guide/src/format/configuration/preprocessors.md index be614cac..8d3e1dc6 100644 --- a/guide/src/format/configuration/preprocessors.md +++ b/guide/src/format/configuration/preprocessors.md @@ -56,3 +56,25 @@ be overridden by adding a `command` field. [preprocessor.random] command = "python random.py" ``` + +### Require A Certain Order + +The order in which preprocessors are run can be controlled with the `before` and `after` fields. +For example, suppose you want your `linenos` preprocessor to process lines that may have been `{{#include}}`d; then you want it to run after the built-in `links` preprocessor, which you can require using either the `before` or `after` field: + +```toml +[preprocessor.linenos] +after = [ "links" ] +``` + +or + +```toml +[preprocessor.links] +before = [ "linenos" ] +``` + +It would also be possible, though redundant, to specify both of the above in the same config file. + +Preprocessors having the same priority specified through `before` and `after` are sorted by name. +Any infinite loops will be detected and produce an error. diff --git a/src/book/mod.rs b/src/book/mod.rs index 7f1159b4..df140797 100644 --- a/src/book/mod.rs +++ b/src/book/mod.rs @@ -20,6 +20,7 @@ use std::process::Command; use std::string::ToString; use tempfile::Builder as TempFileBuilder; use toml::Value; +use topological_sort::TopologicalSort; use crate::errors::*; use crate::preprocess::{ @@ -372,12 +373,7 @@ fn determine_renderers(config: &Config) -> Vec> { renderers } -fn default_preprocessors() -> Vec> { - vec![ - Box::new(LinkPreprocessor::new()), - Box::new(IndexPreprocessor::new()), - ] -} +const DEFAULT_PREPROCESSORS: &[&'static str] = &["links", "index"]; fn is_default_preprocessor(pre: &dyn Preprocessor) -> bool { let name = pre.name(); @@ -386,36 +382,127 @@ fn is_default_preprocessor(pre: &dyn Preprocessor) -> bool { /// Look at the `MDBook` and try to figure out what preprocessors to run. fn determine_preprocessors(config: &Config) -> Result>> { - let mut preprocessors = Vec::new(); + // Collect the names of all preprocessors intended to be run, and the order + // in which they should be run. + let mut preprocessor_names = TopologicalSort::::new(); if config.build.use_default_preprocessors { - preprocessors.extend(default_preprocessors()); + for name in DEFAULT_PREPROCESSORS { + preprocessor_names.insert(name.to_string()); + } } if let Some(preprocessor_table) = config.get("preprocessor").and_then(Value::as_table) { - for key in preprocessor_table.keys() { - match key.as_ref() { - "links" => preprocessors.push(Box::new(LinkPreprocessor::new())), - "index" => preprocessors.push(Box::new(IndexPreprocessor::new())), - name => preprocessors.push(interpret_custom_preprocessor( - name, - &preprocessor_table[name], - )), + for (name, table) in preprocessor_table.iter() { + preprocessor_names.insert(name.to_string()); + + let exists = |name| { + (config.build.use_default_preprocessors && DEFAULT_PREPROCESSORS.contains(&name)) + || preprocessor_table.contains_key(name) + }; + + if let Some(before) = table.get("before") { + let before = before.as_array().ok_or_else(|| { + Error::msg(format!( + "Expected preprocessor.{}.before to be an array", + name + )) + })?; + for after in before { + let after = after.as_str().ok_or_else(|| { + Error::msg(format!( + "Expected preprocessor.{}.before to contain strings", + name + )) + })?; + + if !exists(after) { + // Only warn so that preprocessors can be toggled on and off (e.g. for + // troubleshooting) without having to worry about order too much. + warn!( + "preprocessor.{}.after contains \"{}\", which was not found", + name, after + ); + } else { + preprocessor_names.add_dependency(name, after); + } + } + } + + if let Some(after) = table.get("after") { + let after = after.as_array().ok_or_else(|| { + Error::msg(format!( + "Expected preprocessor.{}.after to be an array", + name + )) + })?; + for before in after { + let before = before.as_str().ok_or_else(|| { + Error::msg(format!( + "Expected preprocessor.{}.after to contain strings", + name + )) + })?; + + if !exists(before) { + // See equivalent warning above for rationale + warn!( + "preprocessor.{}.before contains \"{}\", which was not found", + name, before + ); + } else { + preprocessor_names.add_dependency(before, name); + } + } } } } - Ok(preprocessors) + // Now that all links have been established, queue preprocessors in a suitable order + let mut preprocessors = Vec::with_capacity(preprocessor_names.len()); + // `pop_all()` returns an empty vector when no more items are not being depended upon + for mut names in std::iter::repeat_with(|| preprocessor_names.pop_all()) + .take_while(|names| !names.is_empty()) + { + // The `topological_sort` crate does not guarantee a stable order for ties, even across + // runs of the same program. Thus, we break ties manually by sorting. + // Careful: `str`'s default sorting, which we are implicitly invoking here, uses code point + // values ([1]), which may not be an alphabetical sort. + // As mentioned in [1], doing so depends on locale, which is not desirable for deciding + // preprocessor execution order. + // [1]: https://doc.rust-lang.org/stable/std/cmp/trait.Ord.html#impl-Ord-14 + names.sort(); + for name in names { + let preprocessor: Box = match name.as_str() { + "links" => Box::new(LinkPreprocessor::new()), + "index" => Box::new(IndexPreprocessor::new()), + _ => { + // The only way to request a custom preprocessor is through the `preprocessor` + // table, so it must exist, be a table, and contain the key. + let table = &config.get("preprocessor").unwrap().as_table().unwrap()[&name]; + let command = get_custom_preprocessor_cmd(&name, table); + Box::new(CmdPreprocessor::new(name, command)) + } + }; + preprocessors.push(preprocessor); + } + } + + // "If `pop_all` returns an empty vector and `len` is not 0, there are cyclic dependencies." + // Normally, `len() == 0` is equivalent to `is_empty()`, so we'll use that. + if preprocessor_names.is_empty() { + Ok(preprocessors) + } else { + Err(Error::msg("Cyclic dependency detected in preprocessors")) + } } -fn interpret_custom_preprocessor(key: &str, table: &Value) -> Box { - let command = table +fn get_custom_preprocessor_cmd(key: &str, table: &Value) -> String { + table .get("command") .and_then(Value::as_str) .map(ToString::to_string) - .unwrap_or_else(|| format!("mdbook-{}", key)); - - Box::new(CmdPreprocessor::new(key.to_string(), command)) + .unwrap_or_else(|| format!("mdbook-{}", key)) } fn interpret_custom_renderer(key: &str, table: &Value) -> Box { @@ -515,8 +602,8 @@ mod tests { assert!(got.is_ok()); assert_eq!(got.as_ref().unwrap().len(), 2); - assert_eq!(got.as_ref().unwrap()[0].name(), "links"); - assert_eq!(got.as_ref().unwrap()[1].name(), "index"); + assert_eq!(got.as_ref().unwrap()[0].name(), "index"); + assert_eq!(got.as_ref().unwrap()[1].name(), "links"); } #[test] @@ -563,9 +650,123 @@ mod tests { // make sure the `preprocessor.random` table exists let random = cfg.get_preprocessor("random").unwrap(); - let random = interpret_custom_preprocessor("random", &Value::Table(random.clone())); + let random = get_custom_preprocessor_cmd("random", &Value::Table(random.clone())); - assert_eq!(random.cmd(), "python random.py"); + assert_eq!(random, "python random.py"); + } + + #[test] + fn preprocessor_before_must_be_array() { + let cfg_str = r#" + [preprocessor.random] + before = 0 + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + assert!(determine_preprocessors(&cfg).is_err()); + } + + #[test] + fn preprocessor_after_must_be_array() { + let cfg_str = r#" + [preprocessor.random] + after = 0 + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + assert!(determine_preprocessors(&cfg).is_err()); + } + + #[test] + fn preprocessor_order_is_honored() { + let cfg_str = r#" + [preprocessor.random] + before = [ "last" ] + after = [ "index" ] + + [preprocessor.last] + after = [ "links", "index" ] + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + let preprocessors = determine_preprocessors(&cfg).unwrap(); + let index = |name| { + preprocessors + .iter() + .enumerate() + .find(|(_, preprocessor)| preprocessor.name() == name) + .unwrap() + .0 + }; + let assert_before = |before, after| { + if index(before) >= index(after) { + eprintln!("Preprocessor order:"); + for preprocessor in &preprocessors { + eprintln!(" {}", preprocessor.name()); + } + panic!("{} should come before {}", before, after); + } + }; + + assert_before("index", "random"); + assert_before("index", "last"); + assert_before("random", "last"); + assert_before("links", "last"); + } + + #[test] + fn cyclic_dependencies_are_detected() { + let cfg_str = r#" + [preprocessor.links] + before = [ "index" ] + + [preprocessor.index] + before = [ "links" ] + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + assert!(determine_preprocessors(&cfg).is_err()); + } + + #[test] + fn dependencies_dont_register_undefined_preprocessors() { + let cfg_str = r#" + [preprocessor.links] + before = [ "random" ] + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + let preprocessors = determine_preprocessors(&cfg).unwrap(); + + assert!(preprocessors + .iter() + .find(|preprocessor| preprocessor.name() == "random") + .is_none()); + } + + #[test] + fn dependencies_dont_register_builtin_preprocessors_if_disabled() { + let cfg_str = r#" + [preprocessor.random] + before = [ "links" ] + + [build] + use-default-preprocessors = false + "#; + + let cfg = Config::from_str(cfg_str).unwrap(); + + let preprocessors = determine_preprocessors(&cfg).unwrap(); + + assert!(preprocessors + .iter() + .find(|preprocessor| preprocessor.name() == "links") + .is_none()); } #[test]