dir-odt-to-pdf/src/directory_processor.rs
Jesús Pérex 98a9649bf2 refactor: improve code to more idiomatic
- Use lib.rs for library mode and used with log tests
- Improve enum for tasks on files with file_type.rs
- Fix main and directory_processing
- Add Errors
- Adjust loggint.rs with OneLock and Atomic to control with parallel tests
- Add test.rs for unitary tests
2025-05-27 00:58:59 +01:00

211 lines
7.4 KiB
Rust

use crate::error::{ProcessError, Result};
use crate::file_type::FileType;
// use crate::processed_path::ProcessedPath;
use crate::FILES_TO_CONVERT;
use crate::tools;
use log::{debug, info, warn};
use std::{
collections::HashSet,
fs,
path::{Path, PathBuf},
time::SystemTime,
};
/// DirectoryProcessor handles the conversion of documents from a source directory
/// to a target directory, managing file conversions, copies, and cleanup.
#[derive(Debug)]
pub struct DirectoryProcessor {
pub source_dir: PathBuf,
pub target_dir: PathBuf,
pub source_files: HashSet<PathBuf>,
}
impl DirectoryProcessor {
/// Creates a new DirectoryProcessor instance.
///
/// # Arguments
/// * `source` - The source directory containing files to process
/// * `target` - The target directory where processed files will be placed
pub fn new(source: PathBuf, target: PathBuf) -> Self {
Self {
source_dir: source,
target_dir: target,
source_files: HashSet::new(),
}
}
/// Determines if a file needs to be processed based on modification times.
pub fn needs_processing(source: &Path, target: &Path) -> bool {
if !target.exists() {
return true;
}
let source_time = fs::metadata(source)
.and_then(|m| m.modified())
.unwrap_or_else(|_| SystemTime::now());
let target_time = fs::metadata(target)
.and_then(|m| m.modified())
.unwrap_or_else(|_| SystemTime::now());
source_time > target_time
}
/// Collects all source files that need processing.
fn collect_source_files(&mut self, dir: &Path) -> Result<()> {
for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() {
let path = entry.path();
if path.is_dir() {
self.collect_source_files(&path)?;
continue;
}
let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?;
if file_type.should_process() {
if let Some(source) = file_type.source() {
if let Ok(relative) = source.strip_prefix(&self.source_dir) {
self.source_files.insert(relative.to_path_buf());
}
}
}
}
Ok(())
}
/// Processes all files in the source directory.
fn process_directory(&self, dir: &Path) -> Result<()> {
for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() {
let path = entry.path();
if path.is_dir() {
fs::create_dir_all(self.target_dir.join(path.strip_prefix(&self.source_dir)?))
.map_err(ProcessError::Io)?;
self.process_directory(&path)?;
continue;
}
let file_type = FileType::from_path(&path, &self.source_dir, &self.target_dir)?;
self.process_file(file_type)?;
}
Ok(())
}
/// Processes a single file based on its type.
fn process_file(&self, file_type: FileType) -> Result<()> {
match file_type {
FileType::Convert { source, target } => {
if Self::needs_processing(&source, &target) {
if let Some(parent) = target.parent() {
fs::create_dir_all(parent).map_err(ProcessError::Io)?;
}
tools::convert_file(&source, target.parent().unwrap_or(&self.target_dir))
.map_err(ProcessError::Processing)?;
info!(
"Converted: {} -> {}",
source.strip_prefix(&self.source_dir)?.display(),
target.strip_prefix(&self.target_dir)?.display()
);
}
}
FileType::Copy { source, target } => {
if Self::needs_processing(&source, &target) {
if let Some(parent) = target.parent() {
fs::create_dir_all(parent).map_err(ProcessError::Io)?;
}
fs::copy(&source, &target).map_err(ProcessError::Io)?;
info!(
"Copied: {} -> {}",
source.strip_prefix(&self.source_dir)?.display(),
target.strip_prefix(&self.target_dir)?.display()
);
}
}
_ => {}
}
Ok(())
}
/// Cleans up the target directory by removing obsolete files.
fn clean_target_directory(&self, dir: &Path) -> Result<bool> {
let mut is_empty = true;
for entry in fs::read_dir(dir).map_err(ProcessError::Io)?.flatten() {
let path = entry.path();
let file_type = FileType::from_path(&path, &self.target_dir, &self.target_dir)?;
if file_type.should_ignore() {
is_empty = false;
continue;
}
if path.is_dir() {
match self.clean_target_directory(&path) {
Ok(subdir_empty) => {
if subdir_empty {
if let Err(e) = fs::remove_dir(&path) {
warn!("Could not remove empty directory {}: {}", path.display(), e);
} else {
debug!("Removed empty directory: {}", path.display());
}
} else {
is_empty = false;
}
}
Err(e) => {
warn!("Error cleaning directory {}: {}", path.display(), e);
is_empty = false;
}
}
continue;
}
let relative = path.strip_prefix(&self.target_dir)?;
if !self.should_file_exist(relative) {
if let Err(e) = fs::remove_file(&path) {
warn!("Could not remove file {}: {}", relative.display(), e);
} else {
debug!("Removed obsolete file: {}", relative.display());
}
} else {
is_empty = false;
}
}
Ok(is_empty)
}
/// Determines if a file in the target directory should exist.
fn should_file_exist(&self, rel_path: &Path) -> bool {
if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) {
if ext == "pdf" {
// Check if any corresponding source file exists
FILES_TO_CONVERT
.iter()
.any(|&ext| self.source_files.contains(&rel_path.with_extension(ext)))
} else {
self.source_files.contains(rel_path)
}
} else {
false
}
}
/// Process the entire directory structure.
pub fn process(&mut self) -> Result<()> {
// Clone paths before mutable borrow to avoid borrowing conflicts
let source_dir = self.source_dir.to_owned();
let target_dir = self.target_dir.to_owned();
// Now we can use the mutable borrow for collect_source_files
self.collect_source_files(&source_dir)?;
// And use the cloned paths for the remaining operations
self.process_directory(&source_dir)?;
self.clean_target_directory(&target_dir)?;
Ok(())
}
}