mdbook/src/utils/mod.rs
Matt Ickstadt b2ad669c61 Search with Elasticlunr, updated (#604)
* Add search with elasticlunr.js

This commit adds search functionality to mdBook, based on work done by @phaiax. The in-browser search code uses elasticlunr.js to execute the search, using an index generated at book build time by elasticlunr-rs.

* Add generator comment
Someone on Reddit was wondering how the rust book was generated and said they checked the source. Thought I'd put this here. Might be a good idea to have a little footer "made with mdBook", but this'll do for now.

* Remove search/editor file override behavior

* Use for loop for book iterator

* Improve HTML regex

* Fix search CORS in file URIs

* Use ammonia to sanitize HTML

* Filter html5ever log messages
2018-03-07 21:02:06 +08:00

316 lines
8.7 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#![allow(missing_docs)] // FIXME: Document this
pub mod fs;
mod string;
use errors::Error;
use regex::Regex;
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
OPTION_ENABLE_TABLES};
use std::borrow::Cow;
pub use self::string::{RangeArgument, take_lines};
/// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace<'a>(text: &'a str) -> Cow<'a, str> {
lazy_static! {
static ref RE: Regex = Regex::new(r"\s\s+").unwrap();
}
RE.replace_all(text, " ")
}
/// Convert the given string to a valid HTML element ID
pub fn normalize_id(content: &str) -> String {
let mut ret = content
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
} else if ch.is_whitespace() {
Some('-')
} else {
None
}
})
.collect::<String>();
// Ensure that the first character is [A-Za-z]
if ret.chars().next().map_or(false, |c| !c.is_ascii_alphabetic()) {
ret.insert(0, 'a');
}
ret
}
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
pub fn id_from_content(content: &str) -> String {
let mut content = content.to_string();
// Skip any tags or html-encoded stuff
const REPL_SUB: &[&str] = &["<em>",
"</em>",
"<code>",
"</code>",
"<strong>",
"</strong>",
"&lt;",
"&gt;",
"&amp;",
"&#39;",
"&quot;"];
for sub in REPL_SUB {
content = content.replace(sub, "");
}
// Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_left_matches('#').trim();
normalize_id(trimmed)
}
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
let mut s = String::with_capacity(text.len() * 3 / 2);
let mut opts = Options::empty();
opts.insert(OPTION_ENABLE_TABLES);
opts.insert(OPTION_ENABLE_FOOTNOTES);
let p = Parser::new_ext(text, opts);
let mut converter = EventQuoteConverter::new(curly_quotes);
let events = p.map(clean_codeblock_headers)
.map(|event| converter.convert(event));
html::push_html(&mut s, events);
s
}
struct EventQuoteConverter {
enabled: bool,
convert_text: bool,
}
impl EventQuoteConverter {
fn new(enabled: bool) -> Self {
EventQuoteConverter {
enabled: enabled,
convert_text: true,
}
}
fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
if !self.enabled {
return event;
}
match event {
Event::Start(Tag::CodeBlock(_)) | Event::Start(Tag::Code) => {
self.convert_text = false;
event
}
Event::End(Tag::CodeBlock(_)) | Event::End(Tag::Code) => {
self.convert_text = true;
event
}
Event::Text(ref text) if self.convert_text => {
Event::Text(Cow::from(convert_quotes_to_curly(text)))
}
_ => event,
}
}
}
fn clean_codeblock_headers(event: Event) -> Event {
match event {
Event::Start(Tag::CodeBlock(ref info)) => {
let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
Event::Start(Tag::CodeBlock(Cow::from(info)))
}
_ => event,
}
}
fn convert_quotes_to_curly(original_text: &str) -> String {
// We'll consider the start to be "whitespace".
let mut preceded_by_whitespace = true;
original_text.chars()
.map(|original_char| {
let converted_char = match original_char {
'\'' => {
if preceded_by_whitespace {
''
} else {
''
}
}
'"' => {
if preceded_by_whitespace {
'“'
} else {
'”'
}
}
_ => original_char,
};
preceded_by_whitespace = original_char.is_whitespace();
converted_char
})
.collect()
}
/// Prints a "backtrace" of some `Error`.
pub fn log_backtrace(e: &Error) {
error!("Error: {}", e);
for cause in e.iter().skip(1) {
error!("\tCaused By: {}", cause);
}
}
#[cfg(test)]
mod tests {
mod render_markdown {
use super::super::render_markdown;
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
}
#[test]
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
let input = r#"
'one'
```
'two'
```
`'three'` 'four'"#;
let expected = r#"<p>one</p>
<pre><code>'two'
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn whitespace_outside_of_codeblock_header_is_preserved() {
let input = r#"
some text with spaces
```rust
fn main() {
// code inside is unchanged
}
```
more text with spaces
"#;
let expected = r#"<p>some text with spaces</p>
<pre><code class="language-rust">fn main() {
// code inside is unchanged
}
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let input = r#"
```rust,no_run,should_panic,property_3
```
"#;
let expected =
r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
let input = r#"
```rust, no_run,,,should_panic , ,property_3
```
"#;
let expected =
r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_without_properties_has_proper_html_class() {
let input = r#"
```rust
```
"#;
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
}
mod html_munging {
use super::super::{id_from_content, normalize_id};
#[test]
fn it_generates_anchors() {
assert_eq!(id_from_content("## `--passes`: add more rustdoc passes"),
"a--passes-add-more-rustdoc-passes");
assert_eq!(id_from_content("## Method-call expressions"),
"method-call-expressions");
}
#[test]
fn it_normalizes_ids() {
assert_eq!(normalize_id("`--passes`: add more rustdoc passes"),
"a--passes-add-more-rustdoc-passes");
assert_eq!(normalize_id("Method-call 🐙 expressions \u{1f47c}"),
"method-call--expressions-");
assert_eq!(normalize_id("_-_12345"), "a_-_12345");
assert_eq!(normalize_id("12345"), "a12345");
assert_eq!(normalize_id(""), "");
}
}
mod convert_quotes_to_curly {
use super::super::convert_quotes_to_curly;
#[test]
fn it_converts_single_quotes() {
assert_eq!(convert_quotes_to_curly("'one', 'two'"),
"one, two");
}
#[test]
fn it_converts_double_quotes() {
assert_eq!(convert_quotes_to_curly(r#""one", "two""#),
"“one”, “two”");
}
#[test]
fn it_treats_tab_as_whitespace() {
assert_eq!(convert_quotes_to_curly("\t'one'"), "\tone");
}
}
}