dir-odt-to-pdf/src/directory_processor.rs

224 lines
8.5 KiB
Rust
Raw Normal View History

2025-05-23 20:03:30 +01:00
use crate::tools;
use crate::{FILES_TO_CONVERT, FILES_TO_COPY, PATHS_TO_IGNORE};
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::UNIX_EPOCH;
pub struct DirectoryProcessor {
source_dir: PathBuf,
target_dir: PathBuf,
source_files: HashSet<PathBuf>,
}
impl DirectoryProcessor {
pub fn new(source: PathBuf, target: PathBuf) -> Self {
Self {
source_dir: source,
target_dir: target,
source_files: HashSet::new(),
}
}
fn needs_copy_or_conversion(source_path: &Path, dest_path: &Path) -> bool {
if !dest_path.exists() {
return true;
}
let source_modified = fs::metadata(source_path)
.and_then(|m| m.modified())
.unwrap_or(UNIX_EPOCH);
let dest_modified = fs::metadata(dest_path)
.and_then(|m| m.modified())
.unwrap_or(UNIX_EPOCH);
source_modified > dest_modified
}
fn get_source_files(&mut self, current_dir: &Path) -> Result<(), String> {
let entries = fs::read_dir(current_dir)
.map_err(|e| format!("Error reading directory {}: {}", current_dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
self.get_source_files(&path)?;
} else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
2025-05-23 20:03:30 +01:00
if FILES_TO_CONVERT.contains(&ext.to_lowercase().as_str())
|| FILES_TO_COPY.contains(&ext.to_lowercase().as_str())
{
if let Ok(rel_path) = path.strip_prefix(&self.source_dir) {
self.source_files.insert(rel_path.to_path_buf());
}
}
}
}
Ok(())
}
fn clean_target_directory(&self, current_dir: &Path) -> Result<bool, String> {
let entries = fs::read_dir(current_dir)
.map_err(|e| format!("Error reading directory {}: {}", current_dir.display(), e))?;
let mut is_empty = true;
for entry in entries.flatten() {
let path = entry.path();
// Check if path should be ignored (both files and directories)
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if PATHS_TO_IGNORE.iter().any(|&ignore| name.contains(ignore)) {
is_empty = false;
continue;
}
}
if path.is_dir() {
// Recursively check subdirectories
let subdir_empty = self.clean_target_directory(&path)?;
2025-05-23 20:03:30 +01:00
if subdir_empty {
if let Err(e) = fs::remove_dir(&path) {
2025-05-23 20:03:30 +01:00
eprintln!(
"Warning: Could not remove empty directory {}: {}",
path.display(),
e
);
} else {
println!("Removed empty directory: {}", path.display());
}
} else {
is_empty = false;
}
} else {
2025-05-23 20:03:30 +01:00
let rel_path = path.strip_prefix(&self.target_dir).map_err(|e| {
format!("Error getting relative path for {}: {}", path.display(), e)
})?;
// Check if this file should exist based on source files
let should_exist = if let Some(ext) = rel_path.extension().and_then(|e| e.to_str()) {
if ext == "pdf" {
// For PDF files, check if any corresponding source file exists
2025-05-23 20:03:30 +01:00
FILES_TO_CONVERT
.iter()
.any(|&ext| self.source_files.contains(&rel_path.with_extension(ext)))
} else {
// For other files, check if they exist in source
self.source_files.contains(rel_path)
}
} else {
false
};
if !should_exist {
if let Err(e) = fs::remove_file(&path) {
eprintln!("Warning: Could not remove file {}: {}", path.display(), e);
} else {
println!("Removed obsolete file: {}", path.display());
}
} else {
is_empty = false;
}
}
}
Ok(is_empty)
}
2025-05-23 20:03:30 +01:00
fn process_directory(
&self,
current_source: &Path,
current_target: &Path,
) -> Result<(), String> {
// Create destination directory if it doesn't exist
if let Err(e) = fs::create_dir_all(current_target) {
return Err(format!("Error creating destination directory: {}", e));
}
2025-05-23 20:03:30 +01:00
let entries = fs::read_dir(current_source).map_err(|e| {
format!(
"Error reading source directory {}: {}",
current_source.display(),
e
)
})?;
for entry in entries.flatten() {
let path = entry.path();
2025-05-23 20:03:30 +01:00
if path.is_dir() {
// Get the relative path from source to the current subdirectory
2025-05-23 20:03:30 +01:00
let relative_path = path
.strip_prefix(&self.source_dir)
.map_err(|e| format!("Error getting relative path: {}", e))?;
2025-05-23 20:03:30 +01:00
// Create the corresponding destination subdirectory
let dest_subdir = self.target_dir.join(relative_path);
2025-05-23 20:03:30 +01:00
// Recursively process the subdirectory
self.process_directory(&path, &dest_subdir)?;
} else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
// Get the relative path from source to the current file
2025-05-23 20:03:30 +01:00
let relative_path = path
.strip_prefix(current_source)
.map_err(|e| format!("Error getting relative path: {}", e))?;
if FILES_TO_CONVERT.contains(&ext.to_lowercase().as_str()) {
// Construct the PDF path in the current target directory
2025-05-23 20:03:30 +01:00
let pdf_path = current_target.join(relative_path.with_extension("pdf"));
if Self::needs_copy_or_conversion(&path, &pdf_path) {
2025-05-23 20:03:30 +01:00
match tools::convert_file(
&path,
&pdf_path.parent().unwrap_or(current_target),
) {
Ok(_) => println!(
"Converted: {} -> {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
pdf_path.strip_prefix(&self.target_dir).unwrap().display()
),
Err(e) => eprintln!(
"Error: {} - {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
e
),
}
}
} else if FILES_TO_COPY.contains(&ext.to_lowercase().as_str()) {
// For files to copy directly
let dest_path = current_target.join(relative_path);
if Self::needs_copy_or_conversion(&path, &dest_path) {
match fs::copy(&path, &dest_path) {
2025-05-23 20:03:30 +01:00
Ok(_) => println!(
"Copied file: {} -> {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
dest_path.strip_prefix(&self.target_dir).unwrap().display()
),
Err(e) => eprintln!(
"Error copying file: {} - {}",
path.strip_prefix(&self.source_dir).unwrap().display(),
e
),
}
}
}
}
}
Ok(())
}
pub fn process(&mut self) -> Result<(), String> {
// Process all files
self.process_directory(&self.source_dir.to_owned(), &self.target_dir.to_owned())?;
2025-05-23 20:03:30 +01:00
// Get list of source files for cleaning
self.get_source_files(&self.source_dir.to_owned())?;
// Finally clean target directory
self.clean_target_directory(&self.target_dir.to_owned())?;
2025-05-23 20:03:30 +01:00
Ok(())
}
2025-05-23 20:03:30 +01:00
}