dir-odt-to-pdf/src/directory_processor.rs

226 lines
8.6 KiB
Rust
Raw Normal View History

use crate::error::{ProcessError, Result};
2025-05-23 20:03:30 +01:00
use crate::tools;
use crate::{FILES_TO_CONVERT, FILES_TO_COPY, PATHS_TO_IGNORE};
use log::{debug, info, warn};
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::UNIX_EPOCH;
/// DirectoryProcessor handles the conversion of documents from a source directory
/// to a target directory, managing file conversions, copies, and cleanup.
#[derive(Debug)]
pub struct DirectoryProcessor {
pub(crate) source_dir: PathBuf,
pub(crate) target_dir: PathBuf,
pub(crate) source_files: HashSet<PathBuf>,
}
impl DirectoryProcessor {
/// Creates a new DirectoryProcessor instance.
///
/// # Arguments
/// * `source` - The source directory containing files to process
/// * `target` - The target directory where processed files will be placed
pub fn new(source: PathBuf, target: PathBuf) -> Self {
Self {
source_dir: source,
target_dir: target,
source_files: HashSet::new(),
}
}
/// Determines if a file needs to be copied or converted based on modification times.
pub(crate) fn needs_copy_or_conversion(source_path: &Path, dest_path: &Path) -> bool {
if !dest_path.exists() {
return true;
}
let source_modified = fs::metadata(source_path)
.and_then(|m| m.modified())
.unwrap_or(UNIX_EPOCH);
let dest_modified = fs::metadata(dest_path)
.and_then(|m| m.modified())
.unwrap_or(UNIX_EPOCH);
source_modified > dest_modified
}
/// Collects all source files that need processing.
fn get_source_files(&mut self, current_dir: &Path) -> Result<()> {
let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
self.get_source_files(&path)?;
} else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext = ext.to_lowercase();
if FILES_TO_CONVERT.contains(&ext.as_str()) || FILES_TO_COPY.contains(&ext.as_str()) {
if let Ok(rel_path) = path.strip_prefix(&self.source_dir) {
self.source_files.insert(rel_path.to_path_buf());
}
}
}
}
Ok(())
}
/// Cleans up the target directory by removing obsolete files and empty directories.
fn clean_target_directory(&self, current_dir: &Path) -> Result<bool> {
let entries = fs::read_dir(current_dir).map_err(ProcessError::Io)?;
let mut is_empty = true;
for entry in entries.flatten() {
let path = entry.path();
// Check if path should be ignored
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if PATHS_TO_IGNORE.iter().any(|&ignore| name.contains(ignore)) {
is_empty = false;
continue;
}
}
if path.is_dir() {
match self.clean_target_directory(&path) {
Ok(subdir_empty) => {
if subdir_empty {
if let Err(e) = fs::remove_dir(&path) {
warn!("Could not remove empty directory {}: {}", path.display(), e);
} else {
debug!("Removed empty directory: {}", path.display());
}
} else {
is_empty = false;
}
}
Err(e) => {
warn!("Error cleaning directory {}: {}", path.display(), e);
is_empty = false;
}
}
} else {
let rel_path = path.strip_prefix(&self.target_dir).map_err(ProcessError::StripPrefix)?;
let should_exist = self.should_file_exist(rel_path);
if !should_exist {
if let Err(e) = fs::remove_file(&path) {
warn!("Could not remove file {}: {}", path.display(), e);
} else {
debug!("Removed obsolete file: {}", path.display());
}
} else {
is_empty = false;
}
}
}
Ok(is_empty)
}
/// Determines if a file in the target directory should exist based on source files.
fn should_file_exist(&self, rel_path: &Path) -> bool {
if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) {
if ext == "pdf" {
// For PDF files, check if any corresponding source file exists
FILES_TO_CONVERT
.iter()
.any(|&ext| self.source_files.contains(&rel_path.with_extension(ext)))
} else {
// For other files, check if they exist in source
self.source_files.contains(rel_path)
}
} else {
false
}
}
/// Processes a single directory, converting or copying files as needed.
fn process_directory(&self, current_source: &Path, current_target: &Path) -> Result<()> {
fs::create_dir_all(current_target).map_err(ProcessError::Io)?;
let entries = fs::read_dir(current_source).map_err(ProcessError::Io)?;
for entry in entries.flatten() {
let path = entry.path();
2025-05-23 20:03:30 +01:00
if path.is_dir() {
2025-05-23 20:03:30 +01:00
let relative_path = path
.strip_prefix(&self.source_dir)
.map_err(ProcessError::StripPrefix)?;
let dest_subdir = self.target_dir.join(relative_path);
self.process_directory(&path, &dest_subdir)?;
} else {
self.process_file(&path, current_source, current_target)?;
}
}
Ok(())
}
/// Processes a single file, either converting it to PDF or copying it.
fn process_file(&self, path: &Path, current_source: &Path, current_target: &Path) -> Result<()> {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let relative_path = path
.strip_prefix(current_source)
.map_err(ProcessError::StripPrefix)?;
let ext = ext.to_lowercase();
if FILES_TO_CONVERT.contains(&ext.as_str()) {
self.convert_file(path, relative_path, current_target)?;
} else if FILES_TO_COPY.contains(&ext.as_str()) {
self.copy_file(path, relative_path, current_target)?;
}
}
Ok(())
}
2025-05-23 20:03:30 +01:00
/// Converts a file to PDF format.
fn convert_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> {
let pdf_path = current_target.join(relative_path.with_extension("pdf"));
if Self::needs_copy_or_conversion(path, &pdf_path) {
tools::convert_file(path, pdf_path.parent().unwrap_or(current_target))
.map_err(|e| ProcessError::Processing(e))?;
info!(
"Converted: {} -> {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
pdf_path.strip_prefix(&self.target_dir).unwrap().display()
);
}
Ok(())
}
/// Copies a file to the target directory.
fn copy_file(&self, path: &Path, relative_path: &Path, current_target: &Path) -> Result<()> {
let dest_path = current_target.join(relative_path);
if Self::needs_copy_or_conversion(path, &dest_path) {
fs::copy(path, &dest_path).map_err(ProcessError::Io)?;
info!(
"Copied file: {} -> {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
dest_path.strip_prefix(&self.target_dir).unwrap().display()
);
}
Ok(())
}
2025-05-23 20:03:30 +01:00
/// Processes all files in the source directory, converting or copying them as needed,
/// and then cleans up the target directory.
pub fn process(&mut self) -> Result<()> {
debug!("Collecting source files");
self.get_source_files(&self.source_dir.to_owned())?;
debug!("Starting directory processing");
self.process_directory(&self.source_dir.to_owned(), &self.target_dir.to_owned())?;
debug!("Cleaning target directory");
self.clean_target_directory(&self.target_dir.to_owned())?;
info!("Directory processing completed successfully");
Ok(())
}
2025-05-23 20:03:30 +01:00
}