diff --git a/Cargo.lock b/Cargo.lock index 6abc521..ee3acd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,15 @@ dependencies = [ "libc", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anstream" version = "0.6.18" @@ -76,12 +85,29 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.9.1" @@ -123,6 +149,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim 0.8.0", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "clap" version = "4.5.38" @@ -142,7 +183,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim", + "strsim 0.11.1", ] [[package]] @@ -151,10 +192,10 @@ version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.101", ] [[package]] @@ -180,9 +221,11 @@ name = "dir-odt-to-pdf" version = "0.1.0" dependencies = [ "chrono", - "clap", + "clap 4.5.38", "env_logger", "log", + "serial_test", + "structopt", "tempfile", "thiserror", "which", @@ -239,6 +282,83 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getrandom" version = "0.3.3" @@ -251,12 +371,30 @@ dependencies = [ "wasi", ] +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -308,7 +446,7 @@ checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", ] [[package]] @@ -321,6 +459,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.172" @@ -333,6 +477,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.27" @@ -360,6 +514,41 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "portable-atomic" version = "1.11.0" @@ -375,6 +564,30 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -399,6 +612,15 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +[[package]] +name = "redox_syscall" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "regex" version = "1.11.1" @@ -434,7 +656,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags", + "bitflags 2.9.1", "errno", "libc", "linux-raw-sys", @@ -447,6 +669,27 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +[[package]] +name = "scc" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" +dependencies = [ + "sdd", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sdd" +version = "3.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21" + [[package]] name = "serde" version = "1.0.219" @@ -464,7 +707,32 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", +] + +[[package]] +name = "serial_test" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9" +dependencies = [ + "futures", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", ] [[package]] @@ -473,12 +741,68 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap 2.34.0", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck 0.3.3", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.101" @@ -503,6 +827,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thiserror" version = "2.0.12" @@ -520,7 +853,7 @@ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", ] [[package]] @@ -529,12 +862,36 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" @@ -566,7 +923,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.101", "wasm-bindgen-shared", ] @@ -588,7 +945,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -614,6 +971,28 @@ dependencies = [ "winsafe", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.61.2" @@ -635,7 +1014,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", ] [[package]] @@ -646,7 +1025,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.101", ] [[package]] @@ -758,5 +1137,5 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags", + "bitflags 2.9.1", ] diff --git a/Cargo.toml b/Cargo.toml index dbbd38c..b36deb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,8 @@ env_logger = "0.11.8" thiserror = "2.0.12" tempfile = "3.8" chrono = "0.4" +structopt = "0.3" + +[dev-dependencies] +serial_test = "3.2.0" +tempfile = "3.8" diff --git a/src/directory_processor.rs b/src/directory_processor.rs index 683b30e..943d8b2 100644 --- a/src/directory_processor.rs +++ b/src/directory_processor.rs @@ -1,19 +1,23 @@ use crate::error::{ProcessError, Result}; +use crate::file_type::FileType; +// use crate::processed_path::ProcessedPath; +use crate::FILES_TO_CONVERT; use crate::tools; -use crate::{FILES_TO_CONVERT, FILES_TO_COPY, PATHS_TO_IGNORE}; use log::{debug, info, warn}; -use std::collections::HashSet; -use std::fs; -use std::path::{Path, PathBuf}; -use std::time::UNIX_EPOCH; +use std::{ + collections::HashSet, + fs, + path::{Path, PathBuf}, + time::SystemTime, +}; /// DirectoryProcessor handles the conversion of documents from a source directory /// to a target directory, managing file conversions, copies, and cleanup. #[derive(Debug)] pub struct DirectoryProcessor { - pub(crate) source_dir: PathBuf, - pub(crate) target_dir: PathBuf, - pub(crate) source_files: HashSet, + pub source_dir: PathBuf, + pub target_dir: PathBuf, + pub source_files: HashSet, } impl DirectoryProcessor { @@ -30,36 +34,38 @@ impl DirectoryProcessor { } } - /// Determines if a file needs to be copied or converted based on modification times. - pub(crate) fn needs_copy_or_conversion(source_path: &Path, dest_path: &Path) -> bool { - if !dest_path.exists() { + /// Determines if a file needs to be processed based on modification times. + pub fn needs_processing(source: &Path, target: &Path) -> bool { + if !target.exists() { return true; } - let source_modified = fs::metadata(source_path) + let source_time = fs::metadata(source) .and_then(|m| m.modified()) - .unwrap_or(UNIX_EPOCH); + .unwrap_or_else(|_| SystemTime::now()); - let dest_modified = fs::metadata(dest_path) + let target_time = fs::metadata(target) .and_then(|m| m.modified()) - .unwrap_or(UNIX_EPOCH); + .unwrap_or_else(|_| SystemTime::now()); - source_modified > dest_modified + source_time > target_time } /// Collects all source files that need processing. - fn get_source_files(&mut self, current_dir: &Path) -> Result<()> { - let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?; - - for entry in entries.flatten() { + fn collect_source_files(&mut self, dir: &Path) -> Result<()> { + for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { let path = entry.path(); + if path.is_dir() { - self.get_source_files(&path)?; - } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - let ext = ext.to_lowercase(); - if FILES_TO_CONVERT.contains(&ext.as_str()) || FILES_TO_COPY.contains(&ext.as_str()) { - if let Ok(rel_path) = path.strip_prefix(&self.source_dir) { - self.source_files.insert(rel_path.to_path_buf()); + self.collect_source_files(&path)?; + continue; + } + + let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?; + if file_type.should_process() { + if let Some(source) = file_type.source() { + if let Ok(relative) = source.strip_prefix(&self.source_dir) { + self.source_files.insert(relative.to_path_buf()); } } } @@ -67,20 +73,72 @@ impl DirectoryProcessor { Ok(()) } - /// Cleans up the target directory by removing obsolete files and empty directories. - fn clean_target_directory(&self, current_dir: &Path) -> Result { - let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?; + /// Processes all files in the source directory. + fn process_directory(&self, dir: &Path) -> Result<()> { + for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { + let path = entry.path(); + + if path.is_dir() { + fs::create_dir_all(self.target_dir.join(path.strip_prefix(&self.source_dir)?)) + .map_err(ProcessError::Io)?; + self.process_directory(&path)?; + continue; + } + + let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?; + self.process_file(file_type)?; + } + Ok(()) + } + + /// Processes a single file based on its type. + fn process_file(&self, file_type: FileType) -> Result<()> { + match file_type { + FileType::Convert { source, target } => { + if Self::needs_processing(&source, &target) { + if let Some(parent) = target.parent() { + fs::create_dir_all(parent).map_err(ProcessError::Io)?; + } + tools::convert_file(&source, target.parent().unwrap_or(&self.target_dir)) + .map_err(ProcessError::Processing)?; + + info!( + "Converted: {} -> {}", + source.strip_prefix(&self.source_dir)?.display(), + target.strip_prefix(&self.target_dir)?.display() + ); + } + } + FileType::Copy { source, target } => { + if Self::needs_processing(&source, &target) { + if let Some(parent) = target.parent() { + fs::create_dir_all(parent).map_err(ProcessError::Io)?; + } + fs::copy(&source, &target).map_err(ProcessError::Io)?; + + info!( + "Copied: {} -> {}", + source.strip_prefix(&self.source_dir)?.display(), + target.strip_prefix(&self.target_dir)?.display() + ); + } + } + _ => {} + } + Ok(()) + } + + /// Cleans up the target directory by removing obsolete files. + fn clean_target_directory(&self, dir: &Path) -> Result { let mut is_empty = true; - for entry in entries.flatten() { + for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { let path = entry.path(); - - // Check if path should be ignored - if let Some(name) = path.file_name().and_then(|n| n.to_str()) { - if PATHS_TO_IGNORE.iter().any(|&ignore| name.contains(ignore)) { - is_empty = false; - continue; - } + let file_type = FileType::from_path(&path, &self.target_dir, &self.target_dir)?; + + if file_type.should_ignore() { + is_empty = false; + continue; } if path.is_dir() { @@ -101,35 +159,33 @@ impl DirectoryProcessor { is_empty = false; } } - } else { - let rel_path = path.strip_prefix(&self.target_dir).map_err(ProcessError::StripPrefix)?; - let should_exist = self.should_file_exist(rel_path); + continue; + } - if !should_exist { - if let Err(e) = fs::remove_file(&path) { - warn!("Could not remove file {}: {}", path.display(), e); - } else { - debug!("Removed obsolete file: {}", path.display()); - } + let relative = path.strip_prefix(&self.target_dir)?; + if !self.should_file_exist(relative) { + if let Err(e) = fs::remove_file(&path) { + warn!("Could not remove file {}: {}", relative.display(), e); } else { - is_empty = false; + debug!("Removed obsolete file: {}", relative.display()); } + } else { + is_empty = false; } } Ok(is_empty) } - /// Determines if a file in the target directory should exist based on source files. + /// Determines if a file in the target directory should exist. fn should_file_exist(&self, rel_path: &Path) -> bool { if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) { if ext == "pdf" { - // For PDF files, check if any corresponding source file exists + // Check if any corresponding source file exists FILES_TO_CONVERT .iter() .any(|&ext| self.source_files.contains(&rel_path.with_extension(ext))) } else { - // For other files, check if they exist in source self.source_files.contains(rel_path) } } else { @@ -137,89 +193,18 @@ impl DirectoryProcessor { } } - /// Processes a single directory, converting or copying files as needed. - fn process_directory(&self, current_source: &Path, current_target: &Path) -> Result<()> { - fs::create_dir_all(current_target).map_err(ProcessError::Io)?; - - let entries = fs::read_dir(current_source).map_err(ProcessError::Io)?; - - for entry in entries.flatten() { - let path = entry.path(); - - if path.is_dir() { - let relative_path = path - .strip_prefix(&self.source_dir) - .map_err(ProcessError::StripPrefix)?; - let dest_subdir = self.target_dir.join(relative_path); - self.process_directory(&path, &dest_subdir)?; - } else { - self.process_file(&path, current_source, current_target)?; - } - } - Ok(()) - } - - /// Processes a single file, either converting it to PDF or copying it. - fn process_file(&self, path: &Path, current_source: &Path, current_target: &Path) -> Result<()> { - if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - let relative_path = path - .strip_prefix(current_source) - .map_err(ProcessError::StripPrefix)?; - - let ext = ext.to_lowercase(); - if FILES_TO_CONVERT.contains(&ext.as_str()) { - self.convert_file(path, relative_path, current_target)?; - } else if FILES_TO_COPY.contains(&ext.as_str()) { - self.copy_file(path, relative_path, current_target)?; - } - } - Ok(()) - } - - /// Converts a file to PDF format. - fn convert_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> { - let pdf_path = current_target.join(relative_path.with_extension("pdf")); - - if Self::needs_copy_or_conversion(path, &pdf_path) { - tools::convert_file(path, pdf_path.parent().unwrap_or(current_target)) - .map_err(|e| ProcessError::Processing(e))?; - - info!( - "Converted: {} -> {}", - path.strip_prefix(&self.source_dir).unwrap().display(), - pdf_path.strip_prefix(&self.target_dir).unwrap().display() - ); - } - Ok(()) - } - - /// Copies a file to the target directory. - fn copy_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> { - let dest_path = current_target.join(relative_path); - if Self::needs_copy_or_conversion(path, &dest_path) { - fs::copy(path, &dest_path).map_err(ProcessError::Io)?; - info!( - "Copied file: {} -> {}", - path.strip_prefix(&self.source_dir).unwrap().display(), - dest_path.strip_prefix(&self.target_dir).unwrap().display() - ); - } - Ok(()) - } - - /// Processes all files in the source directory, converting or copying them as needed, - /// and then cleans up the target directory. + /// Process the entire directory structure. pub fn process(&mut self) -> Result<()> { - debug!("Collecting source files"); - self.get_source_files(&self.source_dir.to_owned())?; - - debug!("Starting directory processing"); - self.process_directory(&self.source_dir.to_owned(), &self.target_dir.to_owned())?; - - debug!("Cleaning target directory"); - self.clean_target_directory(&self.target_dir.to_owned())?; - - info!("Directory processing completed successfully"); + // Clone paths before mutable borrow to avoid borrowing conflicts + let source_dir = self.source_dir.to_owned(); + let target_dir = self.target_dir.to_owned(); + + // Now we can use the mutable borrow for collect_source_files + self.collect_source_files(&source_dir)?; + + // And use the cloned paths for the remaining operations + self.process_directory(&source_dir)?; + self.clean_target_directory(&target_dir)?; Ok(()) } } diff --git a/src/error.rs b/src/error.rs index 716128f..3df16d6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,12 +5,30 @@ use thiserror::Error; pub enum ProcessError { #[error("IO error: {0}")] Io(#[from] io::Error), - + #[error("Path strip error: {0}")] StripPrefix(#[from] std::path::StripPrefixError), - + #[error("Directory processing error: {0}")] Processing(String), + + #[error("Logging error: {0}")] + Log(#[from] LogError), } -pub type Result = std::result::Result; \ No newline at end of file +#[derive(Error, Debug)] +pub enum LogError { + #[error("IO error: {0}")] + Io(#[from] io::Error), + + #[error("Logger initialization error: {0}")] + Init(String), + + #[error("Logger already initialized")] + AlreadyInitialized, + + #[error("Lock poisoned: {0}")] + LockPoisoned(String), +} + +pub type Result = std::result::Result; diff --git a/src/file_type.rs b/src/file_type.rs new file mode 100644 index 0000000..49f3bdd --- /dev/null +++ b/src/file_type.rs @@ -0,0 +1,70 @@ +use crate::error::{ProcessError, Result}; +use crate::{FILES_TO_CONVERT, FILES_TO_COPY, PATHS_TO_IGNORE}; +use std::path::{Path, PathBuf}; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum FileType { + Convert { source: PathBuf, target: PathBuf }, + Copy { source: PathBuf, target: PathBuf }, + Ignore, + Other, +} + +impl FileType { + /// Determines the type of a file and its processing requirements + pub fn from_path(path: &Path, source_base: &Path, target_base: &Path) -> Result { + // First check if it should be ignored + if Self::is_ignored(path) { + return Ok(FileType::Ignore); + } + + let relative = path + .strip_prefix(source_base) + .map_err(ProcessError::StripPrefix)?; + + let source = path.to_path_buf(); + let target = target_base.join(relative); + + // Then check file extension + match path + .extension() + .and_then(|e| e.to_str()) + .map(|ext| ext.to_lowercase()) + { + Some(ext) if FILES_TO_CONVERT.contains(&ext.as_str()) => Ok(FileType::Convert { + source, + target: target.with_extension("pdf"), + }), + Some(ext) if FILES_TO_COPY.contains(&ext.as_str()) => { + Ok(FileType::Copy { source, target }) + } + _ => Ok(FileType::Other), + } + } + + /// Check if a path should be ignored + fn is_ignored(path: &Path) -> bool { + path.file_name() + .and_then(|n| n.to_str()) + .map(|name| PATHS_TO_IGNORE.iter().any(|&ignore| name.contains(ignore))) + .unwrap_or(false) + } + + /// Get the source path if this is a processable file type + pub fn source(&self) -> Option<&Path> { + match self { + FileType::Convert { source, .. } | FileType::Copy { source, .. } => Some(source), + _ => None, + } + } + + /// Returns true if this file type should be processed + pub fn should_process(&self) -> bool { + matches!(self, FileType::Convert { .. } | FileType::Copy { .. }) + } + + /// Returns true if this file type should be ignored + pub fn should_ignore(&self) -> bool { + matches!(self, FileType::Ignore) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..65fc923 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,48 @@ +pub mod directory_processor; +pub mod error; +pub mod file_type; +pub mod logging; +pub mod tools; + +// Constants for file processing +pub const FILES_TO_CONVERT: [&str; 3] = ["odt", "doc", "docx"]; +pub const FILES_TO_COPY: [&str; 9] = [ + "jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp", "avif", "txt", +]; +pub const PATHS_TO_IGNORE: [&str; 5] = [ + ".DS_Store", + ".syncthing", + ".sync-conflict-", + ".stfolder", + ".stversions", +]; + +// Re-export commonly used items +pub use crate::logging::LogConfig; +pub use log::{debug, error, info, warn}; + +/// Macro for logging with file and line information +#[macro_export] +macro_rules! log_detail { + ($level:expr, $($arg:tt)+) => { + log::log!( + $level, + "[{}:{}] {}", + file!(), + line!(), + format_args!($($arg)+) + ); + }; +} + +/// Macro for performance logging with timing information +#[macro_export] +macro_rules! log_timed { + ($level:expr, $desc:expr, $body:expr) => {{ + let start = std::time::Instant::now(); + let result = $body; + let duration = start.elapsed(); + log::log!($level, "{} completed in {:.2?}", $desc, duration); + result + }}; +} diff --git a/src/logging.rs b/src/logging.rs index 3029b9d..1962590 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -1,120 +1,216 @@ -use env_logger::{Builder, Target}; -use log::LevelFilter; +use crate::error::{LogError, Result}; +use chrono::Local; +use log::{LevelFilter, Log, Metadata, Record}; use std::fs::OpenOptions; use std::io::{self, Write}; use std::path::PathBuf; +use std::sync::OnceLock; +use std::sync::{Arc, Mutex}; -/// Custom writer that writes either to stderr or to a file -pub(crate) struct LogWriter { +/// Configuration for logging setup +#[derive(Debug, Clone, PartialEq)] +pub struct LogConfig { + pub log_file: Option, + pub log_level: LevelFilter, + pub append_log: bool, +} + +impl Default for LogConfig { + fn default() -> Self { + Self { + log_file: None, + log_level: LevelFilter::Info, + append_log: false, + } + } +} + +/// Custom writer that can write to either a file or stderr +#[derive(Debug)] +pub struct LogWriter { file: Option, } impl LogWriter { - fn new(file: Option) -> Self { + pub fn new(file: Option) -> Self { Self { file } } + + pub fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + match &mut self.file { + Some(file) => file.write_all(buf), + None => io::stderr().write_all(buf), + } + } + + pub fn flush(&mut self) -> io::Result<()> { + match &mut self.file { + Some(file) => file.flush(), + None => io::stderr().flush(), + } + } } impl Write for LogWriter { fn write(&mut self, buf: &[u8]) -> io::Result { - match &mut self.file { - Some(file) => file.write_all(buf)?, // Write to file if specified - None => io::stderr().write_all(buf)?, // Otherwise write to stderr - } + self.write_all(buf)?; Ok(buf.len()) } fn flush(&mut self) -> io::Result<()> { - match &mut self.file { - Some(file) => file.flush()?, - None => io::stderr().flush()?, - } - Ok(()) + self.flush() } } -pub struct LogConfig { - pub log_file: Option, - pub log_level: String, - pub append_log: bool, +#[derive(Debug)] +struct SimpleLogger { + writer: Arc>, + level: Arc>, } -/// Initialize logging to either stderr or file (if specified) -pub fn init_logging(config: &LogConfig) -> Result<(), Box> { - let mut builder = Builder::from_default_env(); +impl SimpleLogger { + fn new(writer: Arc>, level: Arc>) -> Self { + Self { writer, level } + } - // Set log level from command line argument - let level = match config.log_level.to_lowercase().as_str() { - "error" => LevelFilter::Error, - "warn" => LevelFilter::Warn, - "info" => LevelFilter::Info, - "debug" => LevelFilter::Debug, - "trace" => LevelFilter::Trace, - _ => LevelFilter::Info, - }; - builder.filter_level(level); + fn get_level(&self) -> Result { + self.level + .lock() + .map_err(|e| LogError::LockPoisoned(e.to_string()).into()) + .map(|guard| *guard) + } - // Format with timestamps, module path, and line numbers for debug/trace - builder.format(move |buf, record| { - let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S%.3f"); - if level >= LevelFilter::Debug { - writeln!( - buf, - "{} [{}] [{}:{}] - {}", - timestamp, - record.level(), - record.module_path().unwrap_or("unknown"), - record.line().unwrap_or(0), - record.args() - ) - } else { - writeln!( - buf, - "{} [{}] - {}", - timestamp, - record.level(), - record.args() - ) + fn write_log(&self, message: &str) -> Result<()> { + self.writer + .lock() + .map_err(|e| LogError::LockPoisoned(e.to_string()).into()) + .and_then(|mut writer| { + writer + .write_all(message.as_bytes()) + .and_then(|_| writer.flush()) + .map_err(|e| LogError::Io(e).into()) + }) + } +} + +impl Log for SimpleLogger { + fn enabled(&self, metadata: &Metadata) -> bool { + self.get_level() + .map(|level| metadata.level() <= level) + .unwrap_or(false) + } + + fn log(&self, record: &Record) { + if !self.enabled(record.metadata()) { + return; } - }); - // Set up the writer for either file or console output + let message = format!( + "{} [{:<5}] - {}\n", + Local::now().format("%Y-%m-%d %H:%M:%S%.3f"), + record.level(), + record.args() + ); + + if let Err(e) = self.write_log(&message) { + eprintln!("Failed to write log message: {}", e); + } + } + + fn flush(&self) { + if let Err(e) = self + .writer + .lock() + .map_err(|e| LogError::LockPoisoned(e.to_string())) + .and_then(|mut w| w.flush().map_err(LogError::Io)) + { + eprintln!("Failed to flush logger: {}", e); + } + } +} + +static LOGGER: OnceLock> = OnceLock::new(); +static LEVEL: OnceLock>> = OnceLock::new(); + +/// Initialize logging with enhanced features +/// +/// # Errors +/// +/// Returns an error if: +/// - Failed to create log directory +/// - Failed to open log file +/// - Failed to write initial log header +/// - Failed to set global logger +/// - Logger is already initialized +pub fn init_logging(config: LogConfig) -> Result<()> { let log_file = if let Some(log_path) = &config.log_file { + // Create parent directory if it doesn't exist + if let Some(parent) = log_path.parent() { + std::fs::create_dir_all(parent).map_err(LogError::Io)?; + } + let file = OpenOptions::new() .create(true) .write(true) .append(config.append_log) .truncate(!config.append_log) - .open(log_path)?; + .open(log_path) + .map_err(LogError::Io)?; - // Write header to log file if not appending + // Write header for new log files if !config.append_log { writeln!( &file, "=== Log started at {} ===", - chrono::Local::now().format("%Y-%m-%d %H:%M:%S") - )?; + Local::now().format("%Y-%m-%d %H:%M:%S") + ) + .map_err(LogError::Io)?; } Some(file) } else { None }; - // Create and set the writer - let writer = LogWriter::new(log_file); - builder.target(Target::Pipe(Box::new(writer))); + let level = LEVEL + .get_or_init(|| Arc::new(Mutex::new(config.log_level))) + .clone(); - builder.init(); + { + let mut lvl = level + .lock() + .map_err(|e| LogError::LockPoisoned(e.to_string()))?; + *lvl = config.log_level; + } - // Log initial message with configuration info + let writer = Arc::new(Mutex::new(LogWriter::new(log_file))); + let logger = Arc::new(SimpleLogger::new(writer.clone(), level.clone())); + + // Try to set the global logger + if LOGGER.get().is_some() { + return Err(LogError::AlreadyInitialized.into()); + } + + // Set the logger and store it + log::set_boxed_logger(Box::new(SimpleLogger::new(writer, level.clone()))) + .map_err(|e| LogError::Init(e.to_string()))?; + + // Store our logger instance + if LOGGER.set(logger).is_err() { + return Err(LogError::AlreadyInitialized.into()); + } + + log::set_max_level(config.log_level); + + // Log initial configuration log::info!( "Logging initialized (level: {}, output: {})", config.log_level, - config.log_file + config + .log_file .as_ref() - .map(|p| format!("file: {}", p.display())) + .map(|p| p.display().to_string()) .unwrap_or_else(|| "console".to_string()) ); Ok(()) -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index b4dae3f..4174dde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,77 +1,75 @@ -use clap::Parser; -use std::path::PathBuf; -use log::{info, debug, error}; - -mod directory_processor; -mod tools; -mod error; -mod logging; #[cfg(test)] mod tests; -use directory_processor::DirectoryProcessor; -use logging::{LogConfig, init_logging}; +use dir_odt_to_pdf::{ + directory_processor::DirectoryProcessor, + error::Result, + info, log_detail, log_timed, + logging::{LogConfig, init_logging}, +}; +use log::LevelFilter; +use std::path::PathBuf; +use structopt::StructOpt; -pub const FILES_TO_COPY: [&str; 10] = [ - "jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp", "avif", "txt", "md", -]; -pub const FILES_TO_CONVERT: [&str; 3] = ["odt", "doc", "docx"]; -pub const PATHS_TO_IGNORE: [&str; 5] = [".DS_Store", ".syncthing", ".sync-conflict-", ".stfolder", ".stversions"]; - -#[derive(Parser, Debug)] -#[command( - author, - version, - about = "Convert source directory with document files (odt/doc/docx) to target path with pdf files with changes verification" +#[derive(Debug, StructOpt)] +#[structopt( + name = "dir-odt-to-pdf", + about = "Convert ODT files to PDF in a directory" )] -struct Args { - #[arg(help = "Source directory with .odt, .doc, or .docx files")] - source: PathBuf, +struct Opt { + /// Source directory containing ODT files + #[structopt(parse(from_os_str))] + source_dir: PathBuf, - #[arg(help = "Target directory for PDFs converted files")] - dest: PathBuf, + /// Target directory for PDF files + #[structopt(parse(from_os_str))] + target_dir: PathBuf, - #[arg(long, help = "Log file path (optional)")] + /// Log file path (optional) + #[structopt(parse(from_os_str), long)] log_file: Option, - #[arg( - long, - help = "Log level (error, warn, info, debug, trace)", - default_value = "info" - )] - log_level: String, + /// Log level (trace, debug, info, warn, error) + #[structopt(long, default_value = "info")] + log_level: LevelFilter, - #[arg( - long, - help = "Append to log file instead of overwriting", - default_value_t = false - )] + /// Append to existing log file instead of overwriting + #[structopt(long)] append_log: bool, } -fn main() { - let args = Args::parse(); +fn main() -> Result<()> { + let args = Opt::from_args(); + // Initialize logging let log_config = LogConfig { - log_file: args.log_file.to_owned(), - log_level: args.log_level.to_owned(), - append_log: args.append_log.to_owned(), + log_file: args.log_file, + log_level: args.log_level, + append_log: args.append_log, }; - if let Err(e) = init_logging(&log_config) { - eprintln!("Failed to initialize logging: {}", e); - std::process::exit(1); - } + // Initialize logging system + init_logging(log_config)?; - info!("Starting document conversion"); - debug!("Source directory: {}", args.source.display()); - debug!("Target directory: {}", args.dest.display()); + info!("Starting directory processing"); + log_detail!( + log::Level::Info, + "Source directory: {}", + args.source_dir.display() + ); + log_detail!( + log::Level::Info, + "Target directory: {}", + args.target_dir.display() + ); - let mut processor = DirectoryProcessor::new(args.source, args.dest); - if let Err(e) = processor.process() { - error!("Error processing directory: {}", e); - std::process::exit(1); - } + let mut processor = DirectoryProcessor::new(args.source_dir, args.target_dir); - info!("Document conversion completed successfully"); + // Process directory and measure time + log_timed!(log::Level::Info, "Directory processing", { + processor.process()? + }); + + info!("Processing completed successfully"); + Ok(()) }