2025-07-21 11:37:46 -07:00
|
|
|
use anyhow::{Context, Result};
|
2025-07-21 14:47:11 -07:00
|
|
|
use mdbook_core::book::{Book, BookItem, Chapter};
|
2025-07-21 13:26:57 -07:00
|
|
|
use mdbook_core::config::BuildConfig;
|
2025-09-20 17:05:33 -07:00
|
|
|
use mdbook_core::utils::{escape_html, fs};
|
2025-07-21 14:47:11 -07:00
|
|
|
use mdbook_summary::{Link, Summary, SummaryItem, parse_summary};
|
|
|
|
|
use std::path::Path;
|
2025-09-12 06:13:45 -07:00
|
|
|
use tracing::debug;
|
2022-05-05 09:33:51 +03:00
|
|
|
|
2017-11-18 19:50:47 +08:00
|
|
|
/// Load a book into memory from its `src/` directory.
|
2025-07-25 09:02:55 -07:00
|
|
|
pub(crate) fn load_book<P: AsRef<Path>>(src_dir: P, cfg: &BuildConfig) -> Result<Book> {
|
2017-11-18 19:50:47 +08:00
|
|
|
let src_dir = src_dir.as_ref();
|
|
|
|
|
let summary_md = src_dir.join("SUMMARY.md");
|
|
|
|
|
|
2025-09-20 17:05:33 -07:00
|
|
|
let summary_content = fs::read_to_string(&summary_md)?;
|
2020-12-27 15:45:11 -05:00
|
|
|
let summary = parse_summary(&summary_content)
|
2024-09-21 15:53:59 -07:00
|
|
|
.with_context(|| format!("Summary parsing failed for file={summary_md:?}"))?;
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2017-12-11 10:32:35 +11:00
|
|
|
if cfg.create_missing {
|
2021-08-24 08:45:06 +01:00
|
|
|
create_missing(src_dir, &summary).with_context(|| "Unable to create missing chapters")?;
|
2017-12-11 10:32:35 +11:00
|
|
|
}
|
|
|
|
|
|
2017-11-18 20:01:50 +08:00
|
|
|
load_book_from_disk(&summary, src_dir)
|
2017-11-18 19:50:47 +08:00
|
|
|
}
|
|
|
|
|
|
2017-12-11 10:32:35 +11:00
|
|
|
fn create_missing(src_dir: &Path, summary: &Summary) -> Result<()> {
|
|
|
|
|
let mut items: Vec<_> = summary
|
|
|
|
|
.prefix_chapters
|
|
|
|
|
.iter()
|
2020-05-18 11:18:14 -05:00
|
|
|
.chain(summary.numbered_chapters.iter())
|
2017-12-11 10:32:35 +11:00
|
|
|
.chain(summary.suffix_chapters.iter())
|
|
|
|
|
.collect();
|
|
|
|
|
|
2023-05-13 09:50:32 -07:00
|
|
|
while let Some(next) = items.pop() {
|
2017-12-11 10:32:35 +11:00
|
|
|
if let SummaryItem::Link(ref link) = *next {
|
2020-02-29 17:55:45 +01:00
|
|
|
if let Some(ref location) = link.location {
|
|
|
|
|
let filename = src_dir.join(location);
|
|
|
|
|
if !filename.exists() {
|
|
|
|
|
if let Some(parent) = filename.parent() {
|
|
|
|
|
if !parent.exists() {
|
|
|
|
|
fs::create_dir_all(parent)?;
|
|
|
|
|
}
|
2017-12-11 17:29:32 +11:00
|
|
|
}
|
2020-02-29 17:55:45 +01:00
|
|
|
debug!("Creating missing file {}", filename.display());
|
Add a new HTML rendering pipeline
This rewrites the HTML rendering pipeline to use a tree data structure,
and implements a custom HTML serializer. The intent is to make it easier
to make changes and to manipulate the output. This should make some
future changes much easier.
This is a large change, but I'll try to briefly summarize what's
changing:
- All of the HTML rendering support has been moved out of
mdbook-markdown into mdbook-html. For now, all of the API surface is
private, though we may consider ways to safely expose it in the
future.
- Instead of using pulldown-cmark's html serializer, this takes the
pulldown-cmark events and translates them into a tree data structure
(using the ego-tree crate to define the tree). See `tree.rs`.
- HTML in the markdown document is parsed using html5ever, and then
lives inside the same tree data structure. See `tokenizer.rs`.
- Transformations are then applied to the tree data structure. For
example, adding header links or hiding code lines.
- Serialization is a simple process of writing out the nodes to a
string. See `serialize.rs`.
- The search indexer works on the tree structure instead of re-rendering
every chapter twice. See `html_handlebars/search.rs`.
- The print page now takes a very different approach of taking the
same tree structure built for rendering the chapters, and applies
transformations to it. This avoid re-parsing everything again. See
`print.rs`.
- I changed the linking behavior so that links on the print page
link to items on the print page instead of outside the print page.
- There are a variety of small changes to how it serializes as can be
seen in the changes to the tests. Some highlights:
- Code blocks no longer have a second layer of `<pre>` tags wrapping
it.
- Fixed a minor issue where a rust code block with a specific
edition was having the wrong classes when there was a default
edition.
- Drops the ammonia dependency, which significantly reduces the number
of dependencies. It was only being used for a very minor task, and
we can handle it much more easily now.
- Drops `pretty_assertions`, they are no longer used (mostly being
migrated to the testsuite).
There's obviously a lot of risk trying to parse everything to such a low
level, but I think the benefits are worth it. Also, the API isn't super
ergonomic compared to say javascript (there are no selectors), but it
works well enough so far.
I have not run this through rigorous benchmarking, but it does have a
very noticeable performance improvement, especially in a debug build.
I expect in the future that we'll want to expose some kind of
integration with extensions so they have access to this tree structure
(or some kind of tree structure).
Closes https://github.com/rust-lang/mdBook/issues/1736
2025-09-16 20:14:00 -07:00
|
|
|
let title = escape_html(&link.name);
|
2025-09-20 17:05:33 -07:00
|
|
|
fs::write(&filename, format!("# {title}\n"))?;
|
2020-02-29 17:55:45 +01:00
|
|
|
}
|
2017-12-11 10:32:35 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
items.extend(&link.nested_items);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-18 19:50:47 +08:00
|
|
|
/// Use the provided `Summary` to load a `Book` from disk.
|
|
|
|
|
///
|
|
|
|
|
/// You need to pass in the book's source directory because all the links in
|
|
|
|
|
/// `SUMMARY.md` give the chapter locations relative to it.
|
2019-03-04 11:44:00 -08:00
|
|
|
pub(crate) fn load_book_from_disk<P: AsRef<Path>>(summary: &Summary, src_dir: P) -> Result<Book> {
|
2018-01-23 01:28:37 +08:00
|
|
|
debug!("Loading the book from disk");
|
2017-11-18 19:50:47 +08:00
|
|
|
let src_dir = src_dir.as_ref();
|
|
|
|
|
|
2020-05-18 11:18:14 -05:00
|
|
|
let prefix = summary.prefix_chapters.iter();
|
|
|
|
|
let numbered = summary.numbered_chapters.iter();
|
|
|
|
|
let suffix = summary.suffix_chapters.iter();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-05-18 11:18:14 -05:00
|
|
|
let summary_items = prefix.chain(numbered).chain(suffix);
|
2020-03-20 21:18:07 -05:00
|
|
|
|
2020-05-18 11:18:14 -05:00
|
|
|
let mut chapters = Vec::new();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-05-18 11:18:14 -05:00
|
|
|
for summary_item in summary_items {
|
|
|
|
|
let chapter = load_summary_item(summary_item, src_dir, Vec::new())?;
|
|
|
|
|
chapters.push(chapter);
|
2017-11-18 19:50:47 +08:00
|
|
|
}
|
|
|
|
|
|
2025-07-21 14:47:11 -07:00
|
|
|
Ok(Book::new_with_items(chapters))
|
2017-11-18 19:50:47 +08:00
|
|
|
}
|
|
|
|
|
|
2020-02-29 17:55:45 +01:00
|
|
|
fn load_summary_item<P: AsRef<Path> + Clone>(
|
2018-03-14 23:47:17 +08:00
|
|
|
item: &SummaryItem,
|
|
|
|
|
src_dir: P,
|
|
|
|
|
parent_names: Vec<String>,
|
|
|
|
|
) -> Result<BookItem> {
|
2020-05-18 11:18:14 -05:00
|
|
|
match item {
|
2017-11-18 19:50:47 +08:00
|
|
|
SummaryItem::Separator => Ok(BookItem::Separator),
|
2025-07-21 10:30:43 -07:00
|
|
|
SummaryItem::Link(link) => load_chapter(link, src_dir, parent_names).map(BookItem::Chapter),
|
2020-05-18 11:18:14 -05:00
|
|
|
SummaryItem::PartTitle(title) => Ok(BookItem::PartTitle(title.clone())),
|
2025-08-09 16:38:22 -07:00
|
|
|
_ => panic!("SummaryItem {item:?} not covered"),
|
2017-11-18 19:50:47 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-09 22:34:28 +01:00
|
|
|
fn load_chapter<P: AsRef<Path>>(
|
2018-03-14 23:47:17 +08:00
|
|
|
link: &Link,
|
|
|
|
|
src_dir: P,
|
|
|
|
|
parent_names: Vec<String>,
|
|
|
|
|
) -> Result<Chapter> {
|
2020-03-09 22:34:28 +01:00
|
|
|
let src_dir = src_dir.as_ref();
|
|
|
|
|
|
2020-02-29 17:55:45 +01:00
|
|
|
let mut ch = if let Some(ref link_location) = link.location {
|
|
|
|
|
debug!("Loading {} ({})", link.name, link_location.display());
|
|
|
|
|
|
|
|
|
|
let location = if link_location.is_absolute() {
|
|
|
|
|
link_location.clone()
|
|
|
|
|
} else {
|
|
|
|
|
src_dir.join(link_location)
|
|
|
|
|
};
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2025-09-20 17:05:33 -07:00
|
|
|
let mut content = std::fs::read_to_string(&location)
|
|
|
|
|
.with_context(|| format!("failed to read chapter `{}`", link_location.display()))?;
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-07-25 13:02:44 +08:00
|
|
|
if content.as_bytes().starts_with(b"\xef\xbb\xbf") {
|
2020-09-29 18:01:06 +08:00
|
|
|
content.replace_range(..3, "");
|
2020-07-25 13:02:44 +08:00
|
|
|
}
|
|
|
|
|
|
2020-02-29 17:55:45 +01:00
|
|
|
let stripped = location
|
2023-05-13 09:44:11 -07:00
|
|
|
.strip_prefix(src_dir)
|
2020-02-29 17:55:45 +01:00
|
|
|
.expect("Chapters are always inside a book");
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-02-29 17:55:45 +01:00
|
|
|
Chapter::new(&link.name, content, stripped, parent_names.clone())
|
|
|
|
|
} else {
|
|
|
|
|
Chapter::new_draft(&link.name, parent_names.clone())
|
|
|
|
|
};
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-12-31 15:18:37 -05:00
|
|
|
let mut sub_item_parents = parent_names;
|
2020-02-29 17:55:45 +01:00
|
|
|
|
2017-11-18 19:50:47 +08:00
|
|
|
ch.number = link.number.clone();
|
|
|
|
|
|
2018-03-07 07:02:06 -06:00
|
|
|
sub_item_parents.push(link.name.clone());
|
2017-12-11 10:32:35 +11:00
|
|
|
let sub_items = link
|
|
|
|
|
.nested_items
|
|
|
|
|
.iter()
|
2020-03-09 22:34:28 +01:00
|
|
|
.map(|i| load_summary_item(i, src_dir, sub_item_parents.clone()))
|
2017-12-11 10:32:35 +11:00
|
|
|
.collect::<Result<Vec<_>>>()?;
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
ch.sub_items = sub_items;
|
|
|
|
|
|
|
|
|
|
Ok(ch)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
2025-07-21 14:47:11 -07:00
|
|
|
use mdbook_core::book::SectionNumber;
|
|
|
|
|
use std::path::PathBuf;
|
2018-03-27 01:47:37 +02:00
|
|
|
use tempfile::{Builder as TempFileBuilder, TempDir};
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2019-05-07 01:20:58 +07:00
|
|
|
const DUMMY_SRC: &str = "
|
2017-11-18 19:50:47 +08:00
|
|
|
# Dummy Chapter
|
|
|
|
|
|
|
|
|
|
this is some dummy text.
|
|
|
|
|
|
2017-11-18 22:07:08 +08:00
|
|
|
And here is some \
|
|
|
|
|
more text.
|
2017-11-18 19:50:47 +08:00
|
|
|
";
|
|
|
|
|
|
|
|
|
|
/// Create a dummy `Link` in a temporary directory.
|
|
|
|
|
fn dummy_link() -> (Link, TempDir) {
|
2018-03-27 01:47:37 +02:00
|
|
|
let temp = TempFileBuilder::new().prefix("book").tempdir().unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
let chapter_path = temp.path().join("chapter_1.md");
|
2025-09-20 17:05:33 -07:00
|
|
|
fs::write(&chapter_path, DUMMY_SRC).unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-03-24 23:52:24 +01:00
|
|
|
let link = Link::new("Chapter 1", chapter_path);
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
(link, temp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Create a nested `Link` written to a temporary directory.
|
|
|
|
|
fn nested_links() -> (Link, TempDir) {
|
|
|
|
|
let (mut root, temp_dir) = dummy_link();
|
|
|
|
|
|
|
|
|
|
let second_path = temp_dir.path().join("second.md");
|
2025-09-20 17:05:33 -07:00
|
|
|
fs::write(&second_path, "Hello World!").unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2020-03-24 23:52:24 +01:00
|
|
|
let mut second = Link::new("Nested Chapter 1", &second_path);
|
2025-08-09 16:32:13 -07:00
|
|
|
second.number = Some(SectionNumber::new([1, 2]));
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
root.nested_items.push(second.clone().into());
|
|
|
|
|
root.nested_items.push(SummaryItem::Separator);
|
2021-08-24 08:45:06 +01:00
|
|
|
root.nested_items.push(second.into());
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
(root, temp_dir)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn load_a_single_chapter_from_disk() {
|
|
|
|
|
let (link, temp_dir) = dummy_link();
|
2018-03-14 23:47:17 +08:00
|
|
|
let should_be = Chapter::new(
|
|
|
|
|
"Chapter 1",
|
|
|
|
|
DUMMY_SRC.to_string(),
|
|
|
|
|
"chapter_1.md",
|
|
|
|
|
Vec::new(),
|
|
|
|
|
);
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2018-03-07 07:02:06 -06:00
|
|
|
let got = load_chapter(&link, temp_dir.path(), Vec::new()).unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
assert_eq!(got, should_be);
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-07 22:50:25 +08:00
|
|
|
#[test]
|
|
|
|
|
fn load_a_single_chapter_with_utf8_bom_from_disk() {
|
|
|
|
|
let temp_dir = TempFileBuilder::new().prefix("book").tempdir().unwrap();
|
|
|
|
|
|
|
|
|
|
let chapter_path = temp_dir.path().join("chapter_1.md");
|
2025-09-20 17:05:33 -07:00
|
|
|
fs::write(&chapter_path, format!("\u{feff}{DUMMY_SRC}")).unwrap();
|
2020-10-07 22:50:25 +08:00
|
|
|
|
|
|
|
|
let link = Link::new("Chapter 1", chapter_path);
|
|
|
|
|
|
|
|
|
|
let should_be = Chapter::new(
|
|
|
|
|
"Chapter 1",
|
|
|
|
|
DUMMY_SRC.to_string(),
|
|
|
|
|
"chapter_1.md",
|
|
|
|
|
Vec::new(),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let got = load_chapter(&link, temp_dir.path(), Vec::new()).unwrap();
|
|
|
|
|
assert_eq!(got, should_be);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-18 19:50:47 +08:00
|
|
|
#[test]
|
|
|
|
|
fn cant_load_a_nonexistent_chapter() {
|
2020-03-24 23:52:24 +01:00
|
|
|
let link = Link::new("Chapter 1", "/foo/bar/baz.md");
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2018-03-07 07:02:06 -06:00
|
|
|
let got = load_chapter(&link, "", Vec::new());
|
2017-11-18 19:50:47 +08:00
|
|
|
assert!(got.is_err());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn load_recursive_link_with_separators() {
|
|
|
|
|
let (root, temp) = nested_links();
|
|
|
|
|
|
2025-08-09 16:38:22 -07:00
|
|
|
let mut nested = Chapter::new(
|
|
|
|
|
"Nested Chapter 1",
|
|
|
|
|
String::from("Hello World!"),
|
|
|
|
|
"second.md",
|
|
|
|
|
vec![String::from("Chapter 1")],
|
|
|
|
|
);
|
|
|
|
|
nested.number = Some(SectionNumber::new([1, 2]));
|
|
|
|
|
let mut chapter =
|
|
|
|
|
Chapter::new("Chapter 1", String::from(DUMMY_SRC), "chapter_1.md", vec![]);
|
|
|
|
|
chapter.sub_items = vec![
|
|
|
|
|
BookItem::Chapter(nested.clone()),
|
|
|
|
|
BookItem::Separator,
|
|
|
|
|
BookItem::Chapter(nested),
|
|
|
|
|
];
|
|
|
|
|
let should_be = BookItem::Chapter(chapter);
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2018-03-07 07:02:06 -06:00
|
|
|
let got = load_summary_item(&SummaryItem::Link(root), temp.path(), Vec::new()).unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
assert_eq!(got, should_be);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn load_a_book_with_a_single_chapter() {
|
|
|
|
|
let (link, temp) = dummy_link();
|
2025-08-09 16:38:22 -07:00
|
|
|
let mut summary = Summary::default();
|
|
|
|
|
summary.numbered_chapters = vec![SummaryItem::Link(link)];
|
|
|
|
|
let chapter = Chapter::new(
|
|
|
|
|
"Chapter 1",
|
|
|
|
|
String::from(DUMMY_SRC),
|
|
|
|
|
PathBuf::from("chapter_1.md"),
|
|
|
|
|
vec![],
|
|
|
|
|
);
|
2025-08-22 17:13:27 -07:00
|
|
|
let items = vec![BookItem::Chapter(chapter)];
|
|
|
|
|
let should_be = Book::new_with_items(items);
|
2017-11-18 19:50:47 +08:00
|
|
|
|
2017-11-18 20:01:50 +08:00
|
|
|
let got = load_book_from_disk(&summary, temp.path()).unwrap();
|
2017-11-18 19:50:47 +08:00
|
|
|
|
|
|
|
|
assert_eq!(got, should_be);
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-22 20:47:29 +08:00
|
|
|
#[test]
|
|
|
|
|
fn cant_load_chapters_with_an_empty_path() {
|
|
|
|
|
let (_, temp) = dummy_link();
|
2025-08-09 16:38:22 -07:00
|
|
|
let mut summary = Summary::default();
|
|
|
|
|
let link = Link::new("Empty", "");
|
|
|
|
|
summary.numbered_chapters = vec![SummaryItem::Link(link)];
|
2018-01-22 20:47:29 +08:00
|
|
|
let got = load_book_from_disk(&summary, temp.path());
|
|
|
|
|
assert!(got.is_err());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn cant_load_chapters_when_the_link_is_a_directory() {
|
|
|
|
|
let (_, temp) = dummy_link();
|
|
|
|
|
let dir = temp.path().join("nested");
|
2025-09-20 17:05:33 -07:00
|
|
|
fs::create_dir_all(&dir).unwrap();
|
2018-01-22 20:47:29 +08:00
|
|
|
|
2025-08-09 16:38:22 -07:00
|
|
|
let mut summary = Summary::default();
|
|
|
|
|
let link = Link::new("nested", dir);
|
|
|
|
|
summary.numbered_chapters = vec![SummaryItem::Link(link)];
|
2018-01-22 20:47:29 +08:00
|
|
|
|
|
|
|
|
let got = load_book_from_disk(&summary, temp.path());
|
|
|
|
|
assert!(got.is_err());
|
|
|
|
|
}
|
2025-03-09 14:40:00 +02:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn cant_open_summary_md() {
|
|
|
|
|
let cfg = BuildConfig::default();
|
|
|
|
|
let temp_dir = TempFileBuilder::new().prefix("book").tempdir().unwrap();
|
|
|
|
|
|
|
|
|
|
let got = load_book(&temp_dir, &cfg);
|
|
|
|
|
assert!(got.is_err());
|
|
|
|
|
let error_message = got.err().unwrap().to_string();
|
2025-04-10 08:28:13 +03:00
|
|
|
let expected = format!(
|
2025-09-20 17:05:33 -07:00
|
|
|
r#"failed to read `{}`"#,
|
|
|
|
|
temp_dir.path().join("SUMMARY.md").display()
|
2025-03-09 14:40:00 +02:00
|
|
|
);
|
2025-04-10 08:28:13 +03:00
|
|
|
assert_eq!(error_message, expected);
|
2025-03-09 14:40:00 +02:00
|
|
|
}
|
2017-11-18 19:50:47 +08:00
|
|
|
}
|