use crate::error::{ProcessError, Result}; use crate::tools; use crate::{FILES_TO_CONVERT, FILES_TO_COPY, PATHS_TO_IGNORE}; use log::{debug, info, warn}; use std::collections::HashSet; use std::fs; use std::path::{Path, PathBuf}; use std::time::UNIX_EPOCH; /// DirectoryProcessor handles the conversion of documents from a source directory /// to a target directory, managing file conversions, copies, and cleanup. #[derive(Debug)] pub struct DirectoryProcessor { pub(crate) source_dir: PathBuf, pub(crate) target_dir: PathBuf, pub(crate) source_files: HashSet, } impl DirectoryProcessor { /// Creates a new DirectoryProcessor instance. /// /// # Arguments /// * `source` - The source directory containing files to process /// * `target` - The target directory where processed files will be placed pub fn new(source: PathBuf, target: PathBuf) -> Self { Self { source_dir: source, target_dir: target, source_files: HashSet::new(), } } /// Determines if a file needs to be copied or converted based on modification times. pub(crate) fn needs_copy_or_conversion(source_path: &Path, dest_path: &Path) -> bool { if !dest_path.exists() { return true; } let source_modified = fs::metadata(source_path) .and_then(|m| m.modified()) .unwrap_or(UNIX_EPOCH); let dest_modified = fs::metadata(dest_path) .and_then(|m| m.modified()) .unwrap_or(UNIX_EPOCH); source_modified > dest_modified } /// Collects all source files that need processing. fn get_source_files(&mut self, current_dir: &Path) -> Result<()> { let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { self.get_source_files(&path)?; } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { let ext = ext.to_lowercase(); if FILES_TO_CONVERT.contains(&ext.as_str()) || FILES_TO_COPY.contains(&ext.as_str()) { if let Ok(rel_path) = path.strip_prefix(&self.source_dir) { self.source_files.insert(rel_path.to_path_buf()); } } } } Ok(()) } /// Cleans up the target directory by removing obsolete files and empty directories. fn clean_target_directory(&self, current_dir: &Path) -> Result { let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?; let mut is_empty = true; for entry in entries.flatten() { let path = entry.path(); // Check if path should be ignored if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if PATHS_TO_IGNORE.iter().any(|&ignore| name.contains(ignore)) { is_empty = false; continue; } } if path.is_dir() { match self.clean_target_directory(&path) { Ok(subdir_empty) => { if subdir_empty { if let Err(e) = fs::remove_dir(&path) { warn!("Could not remove empty directory {}: {}", path.display(), e); } else { debug!("Removed empty directory: {}", path.display()); } } else { is_empty = false; } } Err(e) => { warn!("Error cleaning directory {}: {}", path.display(), e); is_empty = false; } } } else { let rel_path = path.strip_prefix(&self.target_dir).map_err(ProcessError::StripPrefix)?; let should_exist = self.should_file_exist(rel_path); if !should_exist { if let Err(e) = fs::remove_file(&path) { warn!("Could not remove file {}: {}", path.display(), e); } else { debug!("Removed obsolete file: {}", path.display()); } } else { is_empty = false; } } } Ok(is_empty) } /// Determines if a file in the target directory should exist based on source files. fn should_file_exist(&self, rel_path: &Path) -> bool { if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) { if ext == "pdf" { // For PDF files, check if any corresponding source file exists FILES_TO_CONVERT .iter() .any(|&ext| self.source_files.contains(&rel_path.with_extension(ext))) } else { // For other files, check if they exist in source self.source_files.contains(rel_path) } } else { false } } /// Processes a single directory, converting or copying files as needed. fn process_directory(&self, current_source: &Path, current_target: &Path) -> Result<()> { fs::create_dir_all(current_target).map_err(ProcessError::Io)?; let entries = fs::read_dir(current_source).map_err(ProcessError::Io)?; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { let relative_path = path .strip_prefix(&self.source_dir) .map_err(ProcessError::StripPrefix)?; let dest_subdir = self.target_dir.join(relative_path); self.process_directory(&path, &dest_subdir)?; } else { self.process_file(&path, current_source, current_target)?; } } Ok(()) } /// Processes a single file, either converting it to PDF or copying it. fn process_file(&self, path: &Path, current_source: &Path, current_target: &Path) -> Result<()> { if let Some(ext) = path.extension().and_then(|e| e.to_str()) { let relative_path = path .strip_prefix(current_source) .map_err(ProcessError::StripPrefix)?; let ext = ext.to_lowercase(); if FILES_TO_CONVERT.contains(&ext.as_str()) { self.convert_file(path, relative_path, current_target)?; } else if FILES_TO_COPY.contains(&ext.as_str()) { self.copy_file(path, relative_path, current_target)?; } } Ok(()) } /// Converts a file to PDF format. fn convert_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> { let pdf_path = current_target.join(relative_path.with_extension("pdf")); if Self::needs_copy_or_conversion(path, &pdf_path) { tools::convert_file(path, pdf_path.parent().unwrap_or(current_target)) .map_err(|e| ProcessError::Processing(e))?; info!( "Converted: {} -> {}", path.strip_prefix(&self.source_dir).unwrap().display(), pdf_path.strip_prefix(&self.target_dir).unwrap().display() ); } Ok(()) } /// Copies a file to the target directory. fn copy_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> { let dest_path = current_target.join(relative_path); if Self::needs_copy_or_conversion(path, &dest_path) { fs::copy(path, &dest_path).map_err(ProcessError::Io)?; info!( "Copied file: {} -> {}", path.strip_prefix(&self.source_dir).unwrap().display(), dest_path.strip_prefix(&self.target_dir).unwrap().display() ); } Ok(()) } /// Processes all files in the source directory, converting or copying them as needed, /// and then cleans up the target directory. pub fn process(&mut self) -> Result<()> { debug!("Collecting source files"); self.get_source_files(&self.source_dir.to_owned())?; debug!("Starting directory processing"); self.process_directory(&self.source_dir.to_owned(), &self.target_dir.to_owned())?; debug!("Cleaning target directory"); self.clean_target_directory(&self.target_dir.to_owned())?; info!("Directory processing completed successfully"); Ok(()) } }