use crate::error::{ProcessError, Result}; use crate::file_type::FileType; // use crate::processed_path::ProcessedPath; use crate::FILES_TO_CONVERT; use crate::tools; use log::{debug, info, warn}; use std::{ collections::HashSet, fs, path::{Path, PathBuf}, time::SystemTime, }; /// DirectoryProcessor handles the conversion of documents from a source directory /// to a target directory, managing file conversions, copies, and cleanup. #[derive(Debug)] pub struct DirectoryProcessor { pub source_dir: PathBuf, pub target_dir: PathBuf, pub source_files: HashSet, } impl DirectoryProcessor { /// Creates a new DirectoryProcessor instance. /// /// # Arguments /// * `source` - The source directory containing files to process /// * `target` - The target directory where processed files will be placed pub fn new(source: PathBuf, target: PathBuf) -> Self { Self { source_dir: source, target_dir: target, source_files: HashSet::new(), } } /// Determines if a file needs to be processed based on modification times. pub fn needs_processing(source: &Path, target: &Path) -> bool { if !target.exists() { return true; } let source_time = fs::metadata(source) .and_then(|m| m.modified()) .unwrap_or_else(|_| SystemTime::now()); let target_time = fs::metadata(target) .and_then(|m| m.modified()) .unwrap_or_else(|_| SystemTime::now()); source_time > target_time } /// Collects all source files that need processing. fn collect_source_files(&mut self, dir: &Path) -> Result<()> { for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { let path = entry.path(); if path.is_dir() { self.collect_source_files(&path)?; continue; } let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?; if file_type.should_process() { if let Some(source) = file_type.source() { if let Ok(relative) = source.strip_prefix(&self.source_dir) { self.source_files.insert(relative.to_path_buf()); } } } } Ok(()) } /// Processes all files in the source directory. fn process_directory(&self, dir: &Path) -> Result<()> { for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { let path = entry.path(); if path.is_dir() { fs::create_dir_all(self.target_dir.join(path.strip_prefix(&self.source_dir)?)) .map_err(ProcessError::Io)?; self.process_directory(&path)?; continue; } let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?; self.process_file(file_type)?; } Ok(()) } /// Processes a single file based on its type. fn process_file(&self, file_type: FileType) -> Result<()> { match file_type { FileType::Convert { source, target } => { if Self::needs_processing(&source, &target) { if let Some(parent) = target.parent() { fs::create_dir_all(parent).map_err(ProcessError::Io)?; } tools::convert_file(&source, target.parent().unwrap_or(&self.target_dir)) .map_err(ProcessError::Processing)?; info!( "Converted: {} -> {}", source.strip_prefix(&self.source_dir)?.display(), target.strip_prefix(&self.target_dir)?.display() ); } } FileType::Copy { source, target } => { if Self::needs_processing(&source, &target) { if let Some(parent) = target.parent() { fs::create_dir_all(parent).map_err(ProcessError::Io)?; } fs::copy(&source, &target).map_err(ProcessError::Io)?; info!( "Copied: {} -> {}", source.strip_prefix(&self.source_dir)?.display(), target.strip_prefix(&self.target_dir)?.display() ); } } _ => {} } Ok(()) } /// Cleans up the target directory by removing obsolete files. fn clean_target_directory(&self, dir: &Path) -> Result { let mut is_empty = true; for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() { let path = entry.path(); let file_type = FileType::from_path(&path, &self.target_dir, &self.target_dir)?; if file_type.should_ignore() { is_empty = false; continue; } if path.is_dir() { match self.clean_target_directory(&path) { Ok(subdir_empty) => { if subdir_empty { if let Err(e) = fs::remove_dir(&path) { warn!("Could not remove empty directory {}: {}", path.display(), e); } else { debug!("Removed empty directory: {}", path.display()); } } else { is_empty = false; } } Err(e) => { warn!("Error cleaning directory {}: {}", path.display(), e); is_empty = false; } } continue; } let relative = path.strip_prefix(&self.target_dir)?; if !self.should_file_exist(relative) { if let Err(e) = fs::remove_file(&path) { warn!("Could not remove file {}: {}", relative.display(), e); } else { debug!("Removed obsolete file: {}", relative.display()); } } else { is_empty = false; } } Ok(is_empty) } /// Determines if a file in the target directory should exist. fn should_file_exist(&self, rel_path: &Path) -> bool { if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) { if ext == "pdf" { // Check if any corresponding source file exists FILES_TO_CONVERT .iter() .any(|&ext| self.source_files.contains(&rel_path.with_extension(ext))) } else { self.source_files.contains(rel_path) } } else { false } } /// Process the entire directory structure. pub fn process(&mut self) -> Result<()> { // Clone paths before mutable borrow to avoid borrowing conflicts let source_dir = self.source_dir.to_owned(); let target_dir = self.target_dir.to_owned(); // Now we can use the mutable borrow for collect_source_files self.collect_source_files(&source_dir)?; // And use the cloned paths for the remaining operations self.process_directory(&source_dir)?; self.clean_target_directory(&target_dir)?; Ok(()) } }