picobak/src/main.rs

281 lines
10 KiB
Rust

use std::fs::{create_dir_all, copy};
use std::process::{Command};
use std::{fs::File, path::PathBuf};
use std::path::Path;
use std::fmt;
use std::sync::Mutex;
use clap::Parser;
use exif::{Tag, In, Value};
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
use indicatif::ParallelProgressIterator;
use rayon::prelude::*;
use serde::Deserialize;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct CliArgs {
/// Pictures library directory
backup_root: String,
/// Picture to backup. Alternatively, you can send a list of
/// pictures to backup via stdin.
file_path: Option<String>,
}
/// Structure used to parse the JSON output of the exiftool program.
#[derive(Debug, Deserialize)]
struct ExifToolEntry {
#[serde(rename(deserialize = "CreateDate"))]
create_date: Option<String>
}
enum BackupSuccess {
AlreadyBackup(String),
Backup(String, PictureDatetimeOrigin)
}
enum BackupFailure {
AlreadyBackupButDifferent(String),
CopyError(String),
IncorrectFilename(String)
}
enum PictureDatetimeOrigin {
Exif,
ExifTool,
FilesystemMetadata
}
static CREATE_DIR_MUTEX: Mutex<()> = Mutex::new(());
impl fmt::Display for BackupFailure {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::AlreadyBackupButDifferent(s) => write!(f, "{}: already exists in the photo library but has a different content", s),
Self::CopyError(s) => write!(f, "Copy error, {}", s),
Self::IncorrectFilename(s) => write!(f, "Incorrect Filename, {}", s)
}
}
}
fn main() {
let cli = CliArgs::parse();
validate_args(&cli);
let stdin = std::io::stdin();
let filepaths = match cli.file_path {
Some(ref fp) => vec!(Ok(fp.to_string())),
None => stdin.lines()
.into_iter()
.map(|l| l.map_err(|_|BackupFailure::IncorrectFilename(String::from("Can't parse filename from stdin"))))
.collect()
};
let filepaths_len = filepaths.len() as u64;
let res: Vec<Result<BackupSuccess, BackupFailure>> = filepaths
.into_par_iter()
.progress_count(filepaths_len)
.map(|filepathres| {
let filepath = filepathres?;
backup_file(&cli, &filepath)
})
.collect();
display_backup_result(res)
}
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
let mut nb_copy_exif: u32 = 0;
let mut nb_copy_exiftool: u32 = 0;
let mut nb_copy_filesystem: u32 = 0;
let mut nb_duplicates: u32 = 0;
let mut failures: Vec<BackupFailure> = Vec::new();
results.into_iter().for_each(|e| match e {
Ok(s) => {
match s {
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
BackupSuccess::Backup(_, origin) => match origin {
PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
PictureDatetimeOrigin::ExifTool => nb_copy_exiftool +=1,
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
}
}
},
Err(f) => failures.push(f)
});
eprintln!("Backup Statistics:");
eprintln!("==================");
eprintln!("Duplicates: {}", nb_duplicates);
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
eprintln!("To classify these newly copied files, we used:");
eprintln!(" {}: EXIF metadata", nb_copy_exif);
eprintln!(" {}: the exiftool program", nb_copy_exiftool);
eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
eprintln!("Failures: {}", failures.len());
if failures.len() != 0 {
eprintln!("");
eprintln!("WARNING: unable to backup some files:");
failures.iter().for_each(|f| eprintln!("{}", f));
}
}
/// Backup a file.
fn backup_file(cli: &CliArgs, file_path: &str) -> Result<BackupSuccess, BackupFailure> {
let filename = Path::new(file_path);
let file = File::open(filename).map_err(
|e| BackupFailure::CopyError(format!("cannot open the {} file: {}", file_path, e.to_string()))
)?;
let (datetime, origin) = get_picture_datetime(file_path, &file);
let picture_dir = find_backup_dir(&cli.backup_root, &datetime);
upsert_picture_directory(&picture_dir);
let filename_name = filename.file_name()
.ok_or_else(|| BackupFailure::IncorrectFilename(
format!("Incorrect file name {}", filename.display())))?;
let target_filename = picture_dir.join(filename_name);
if !target_filename.is_file() {
match copy(filename, &target_filename) {
Ok(_) => Ok(BackupSuccess::Backup(
target_filename.into_os_string().into_string().unwrap(),
origin)),
Err(_) => Err(BackupFailure::CopyError(String::from(file_path)))
}
} else if same_files(filename, &target_filename) {
Ok(BackupSuccess::AlreadyBackup(String::from(file_path)))
} else {
Err(BackupFailure::AlreadyBackupButDifferent(format!("{} => {}", file_path, target_filename.display())))
}
}
fn upsert_picture_directory(picture_dir: &PathBuf) {
// Prevent concurrent directory creation by locking a mutex.
let _ = CREATE_DIR_MUTEX.lock();
if !picture_dir.exists() {
create_dir_all(&picture_dir)
.unwrap_or_else(
|e| panic!("ERROR: cannot create the backup directory {}: {}", &picture_dir.display(), e.to_string())
);
} else if !picture_dir.is_dir() {
panic!("ERROR: {} already exists and is not a directory. Can't use it to store a picture.", &picture_dir.display())
}
}
/// Retrieves when the picture has been shot from the EXIF metadata.
/// If no datetime EXIF data is attached to the file, use the file
/// last modification date.
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
// Try exif crate.
get_picture_exif_datetime(file).map(|dt| (dt, PictureDatetimeOrigin::Exif))
// Exif failed, shell out to exiftool.
.or_else(|| {
get_picture_exiftool_datetime(file_path)
.map(|dt| (dt, PictureDatetimeOrigin::ExifTool))})
// Exiftool failed as well. Fallback to Unix datetime.
.unwrap_or((get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata))
}
/// Retrieves the picture EXIF datetime.
fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
let mut bufreader = std::io::BufReader::new(file);
let exifreader = exif::Reader::new();
let exif = exifreader.read_from_container(&mut bufreader).ok()?;
let datetime_field = exif.get_field(Tag::DateTimeOriginal, In::PRIMARY)?;
match datetime_field.value {
Value::Ascii(ref vec) if !vec.is_empty() => {
// Meh… I know…
let str_date = String::from_utf8(vec[0].to_vec()).unwrap();
NaiveDateTime::parse_from_str(&str_date, "%Y:%m:%d %H:%M:%S")
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
.ok()
},
_ => None
}
}
/// Shells out to the exiftool CLI. Despite its name, exiftool parses
/// much more metadata than exif. Such as MOV metadata.
fn get_picture_exiftool_datetime(file_path: &str) -> Option<DateTime<Utc>> {
let output = Command::new("exiftool")
.args(["-j", "-P", "-CreateDate", file_path])
.output()
.ok()?;
if !output.status.success() {
return None
}
let stdout_str = String::from_utf8(output.stdout).ok()?;
let parsed_output: Vec<ExifToolEntry> = serde_json::from_str(&stdout_str).ok()?;
if parsed_output.len() != 1 {
None
} else {
let entry = parsed_output.get(0)?;
let date: &str = &entry.create_date.as_ref()?;
NaiveDateTime::parse_from_str(date, "%Y:%m:%d %H:%M:%S")
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
.ok()
}
}
/// If we cannot load the EXIF creation datetime, we end up using the
/// last modified time of the file.
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
let systemtime = file.metadata()
.unwrap_or_else(|_| panic!("Cannot retrieve UNIX file metadata for {}", file_path))
.modified()
.unwrap_or_else(|_| panic!("Cannot retrieve modified time for {}", file_path));
systemtime.into()
}
/// Return directory in which we want to save the picture.
fn find_backup_dir(backup_root: &str, datetime: &DateTime<Utc>) -> PathBuf {
let backup_root = Path::new(backup_root);
backup_root
.join(format!("{:04}", datetime.year()))
.join(format!("{:02}", datetime.month()))
.join(format!("{:02}", datetime.day()))
}
/// Sanity function making sure the user did not give us complete
/// garbage data.
fn validate_args(args: &CliArgs) {
match &args.file_path {
Some(file_path) => {
if !Path::new(&file_path).is_file() {
panic!("ERROR: {} is not a file", &file_path);
};
}
None => ()
};
if Path::new(&args.backup_root).is_file() {
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
};
let exif_tool_in_path = Command::new("bash")
.args(["-c", "command exiftool"])
.output()
.ok()
.map(|e| e.status.success())
.unwrap();
if !exif_tool_in_path {
eprintln!("Exiftool doesn't seem to be present in $PATH. Install it if you want to be able to extract more pictures metadata");
}
}
/// Compare two files and check if they're the same. We're not really
/// comparing the whole file, it'd be too expensive. We assume that if
/// two pictures have the same EXIF data, the same size and the same
/// creation date, they're the same.
fn same_files(source: &Path, target: &Path) -> bool {
let source_file = File::open(source)
.unwrap_or_else(|_| panic!("Error: cannot open file {}", &source.display()))
.metadata()
.unwrap_or_else(|_| panic!("Error: cannot get metadata of file {}", &source.display()));
let target_file = File::open(target)
.unwrap_or_else(|_| panic!("Error: cannot open file {}", &target.display()))
.metadata()
.unwrap_or_else(|_| panic!("Error: cannot get metadata of file {}", &target.display()));
source_file.len() == target_file.len()
}