Migrate book_creates_reasonable_search_index to BookTest

2025-04-22 09:00:47 -07:00 · 2025-04-22 09:00:47 -07:00 · cad8988f8d
commit cad8988f8d
parent 3fce1151dd
12 changed files with 167 additions and 61 deletions
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@ -393,67 +393,6 @@ mod search {
        serde_json::from_str(&index.replace("\\'", "'").replace("\\\\", "\\")).unwrap()
    }

-    #[test]
-    fn book_creates_reasonable_search_index() {
-        let temp = DummyBook::new().build().unwrap();
-        let md = MDBook::load(temp.path()).unwrap();
-        md.build().unwrap();
-
-        let index = read_book_index(temp.path());
-
-        let doc_urls = index["doc_urls"].as_array().unwrap();
-        eprintln!("doc_urls={doc_urls:#?}",);
-        let get_doc_ref =
-            |url: &str| -> String { doc_urls.iter().position(|s| s == url).unwrap().to_string() };
-
-        let first_chapter = get_doc_ref("first/index.html#first-chapter");
-        let introduction = get_doc_ref("intro.html#introduction");
-        let some_section = get_doc_ref("first/index.html#some-section");
-        let summary = get_doc_ref("first/includes.html#summary");
-        let no_headers = get_doc_ref("first/no-headers.html");
-        let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
-        let conclusion = get_doc_ref("conclusion.html#conclusion");
-        let heading_attrs = get_doc_ref("first/heading-attributes.html#both");
-
-        let bodyidx = &index["index"]["index"]["body"]["root"];
-        let textidx = &bodyidx["t"]["e"]["x"]["t"];
-        assert_eq!(textidx["df"], 5);
-        assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
-        assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
-
-        let docs = &index["index"]["documentStore"]["docs"];
-        assert_eq!(docs[&first_chapter]["body"], "more text.");
-        assert_eq!(docs[&some_section]["body"], "");
-        assert_eq!(
-            docs[&summary]["body"],
-            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Heading Attributes Second Chapter Nested Chapter Conclusion"
-        );
-        assert_eq!(
-            docs[&summary]["breadcrumbs"],
-            "First Chapter » Includes » Summary"
-        );
-        // See note about InlineHtml in search.rs. Ideally the `alert()` part
-        // should not be in the index, but we don't have a way to scrub inline
-        // html.
-        assert_eq!(docs[&conclusion]["body"], "I put &lt;HTML&gt; in here! Sneaky inline event alert(\"inline\");. But regular inline is indexed.");
-        assert_eq!(
-            docs[&no_headers]["breadcrumbs"],
-            "First Chapter » No Headers"
-        );
-        assert_eq!(
-            docs[&duplicate_headers_1]["breadcrumbs"],
-            "First Chapter » Duplicate Headers » Header Text"
-        );
-        assert_eq!(
-            docs[&no_headers]["body"],
-            "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex."
-        );
-        assert_eq!(
-            docs[&heading_attrs]["breadcrumbs"],
-            "First Chapter » Heading Attributes » Heading with id and classes"
-        );
-    }
-
    #[test]
    fn can_disable_individual_chapters() {
        let temp = DummyBook::new().build().unwrap();
--- a/tests/testsuite/main.rs
+++ b/tests/testsuite/main.rs
@ -15,6 +15,8 @@ mod print;
 mod redirects;
 mod renderer;
 mod rendering;
+#[cfg(feature = "search")]
+mod search;

 mod prelude {
    pub use crate::book_test::BookTest;
--- a/tests/testsuite/search.rs
+++ b/tests/testsuite/search.rs
@ -0,0 +1,78 @@
+//! Tests for search support.
+
+use crate::prelude::*;
+use std::path::Path;
+
+fn read_book_index(root: &Path) -> serde_json::Value {
+    let index = root.join("book/searchindex.js");
+    let index = std::fs::read_to_string(index).unwrap();
+    let index = index.trim_start_matches("window.search = JSON.parse('");
+    let index = index.trim_end_matches("');");
+    // We need unescape the string as it's supposed to be an escaped JS string.
+    serde_json::from_str(&index.replace("\\'", "'").replace("\\\\", "\\")).unwrap()
+}
+
+// Some spot checks for the generation of the search index.
+#[test]
+fn reasonable_search_index() {
+    let mut test = BookTest::from_dir("search/reasonable_search_index");
+    test.build();
+    let index = read_book_index(&test.dir);
+
+    let doc_urls = index["doc_urls"].as_array().unwrap();
+    eprintln!("doc_urls={doc_urls:#?}",);
+    let get_doc_ref = |url: &str| -> String {
+        doc_urls
+            .iter()
+            .position(|s| s == url)
+            .unwrap_or_else(|| panic!("failed to find {url}"))
+            .to_string()
+    };
+
+    let first_chapter = get_doc_ref("first/index.html#first-chapter");
+    let introduction = get_doc_ref("intro.html#introduction");
+    let some_section = get_doc_ref("first/index.html#some-section");
+    let summary = get_doc_ref("first/includes.html#summary");
+    let no_headers = get_doc_ref("first/no-headers.html");
+    let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
+    let heading_attrs = get_doc_ref("first/heading-attributes.html#both");
+    let sneaky = get_doc_ref("intro.html#sneaky");
+
+    let bodyidx = &index["index"]["index"]["body"]["root"];
+    let textidx = &bodyidx["t"]["e"]["x"]["t"];
+    assert_eq!(textidx["df"], 5);
+    assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
+    assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
+
+    let docs = &index["index"]["documentStore"]["docs"];
+    assert_eq!(docs[&first_chapter]["body"], "more text.");
+    assert_eq!(docs[&some_section]["body"], "");
+    assert_eq!(
+        docs[&summary]["body"],
+        "Introduction First Chapter Includes Unicode No Headers Duplicate Headers Heading Attributes"
+    );
+    assert_eq!(
+        docs[&summary]["breadcrumbs"],
+        "First Chapter » Includes » Summary"
+    );
+    // See note about InlineHtml in search.rs. Ideally the `alert()` part
+    // should not be in the index, but we don't have a way to scrub inline
+    // html.
+    assert_eq!(docs[&sneaky]["body"], "I put &lt;HTML&gt; in here! Sneaky inline event alert(\"inline\");. But regular inline is indexed.");
+    assert_eq!(
+        docs[&no_headers]["breadcrumbs"],
+        "First Chapter » No Headers"
+    );
+    assert_eq!(
+        docs[&duplicate_headers_1]["breadcrumbs"],
+        "First Chapter » Duplicate Headers » Header Text"
+    );
+    assert_eq!(
+        docs[&no_headers]["body"],
+        "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex."
+    );
+    assert_eq!(
+        docs[&heading_attrs]["breadcrumbs"],
+        "First Chapter » Heading Attributes » Heading with id and classes"
+    );
+}
--- a/tests/testsuite/search/reasonable_search_index/expected_index.js
+++ b/tests/testsuite/search/reasonable_search_index/expected_index.js
--- a/tests/testsuite/search/reasonable_search_index/src/SUMMARY.md
+++ b/tests/testsuite/search/reasonable_search_index/src/SUMMARY.md
@ -0,0 +1,10 @@
+# Summary
+
+[Introduction](intro.md)
+
+- [First Chapter](first/index.md)
+    - [Includes](first/includes.md)
+    - [Unicode](first/unicode.md)
+    - [No Headers](first/no-headers.md)
+    - [Duplicate Headers](first/duplicate-headers.md)
+    - [Heading Attributes](first/heading-attributes.md)
--- a/tests/testsuite/search/reasonable_search_index/src/first/duplicate-headers.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/duplicate-headers.md
@ -0,0 +1,9 @@
+# Duplicate headers
+
+This page validates behaviour of duplicate headers.
+
+# Header Text
+
+# Header Text
+
+# header-text
--- a/tests/testsuite/search/reasonable_search_index/src/first/heading-attributes.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/heading-attributes.md
@ -0,0 +1,5 @@
+# Heading Attributes {#attrs}
+
+## Heading with classes {.class1 .class2}
+
+## Heading with id and classes {#both .class1 .class2}
--- a/tests/testsuite/search/reasonable_search_index/src/first/includes.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/includes.md
@ -0,0 +1,3 @@
+# Includes
+
+{{#include ../SUMMARY.md::}}
--- a/tests/testsuite/search/reasonable_search_index/src/first/index.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/index.md
@ -0,0 +1,5 @@
+# First Chapter
+
+more text.
+
+## Some Section
--- a/tests/testsuite/search/reasonable_search_index/src/first/no-headers.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/no-headers.md
@ -0,0 +1,5 @@
+Capybara capybara capybara.
+
+Capybara capybara capybara.
+
+ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.
--- a/tests/testsuite/search/reasonable_search_index/src/first/unicode.md
+++ b/tests/testsuite/search/reasonable_search_index/src/first/unicode.md
@ -0,0 +1,21 @@
+# Unicode stress tests
+
+Please be careful editing, this contains carefully crafted characters.
+
+Two byte character: spatiëring
+
+Combining character: spatiëring
+
+Three byte character: 书こんにちは
+
+Four byte character: 𐌀‮𐌁‮𐌂‮𐌃‮𐌄‮𐌅‮𐌆‮𐌇‮𐌈‬
+
+Right-to-left: مرحبا
+
+Emoticons: 🔊 😍 💜 1️⃣
+
+right-to-left mark: hello באמת!‏
+
+
+Zalgo: ǫ̛̖̱̗̝͈̋͒͋̏ͥͫ̒̆ͩ̏͌̾͊͐ͪ̾̚
+
--- a/tests/testsuite/search/reasonable_search_index/src/intro.md
+++ b/tests/testsuite/search/reasonable_search_index/src/intro.md
@ -0,0 +1,28 @@
+# Introduction
+
+Here's some interesting text...
+
+## Sneaky
+
+<p>
+<!--secret secret-->
+I put &lt;HTML&gt; in here!<br/>
+</p>
+<script type="text/javascript" >
+// I probably shouldn't do this
+if (3 < 5 > 10)
+{
+    alert("The sky is falling!");
+}
+</script >
+<style >
+/*
+css looks, like this {
+    foo: < 3 <bar >
+}
+*/
+</style>
+
+Sneaky inline event <script>alert("inline");</script>.
+
+But regular <b>inline</b> is indexed.