Backup multiple files on the same invocation

Concurently backing up the files pointed by stdin. Instead of having
to spin up a whole picobak process for each and every picture we want
to back up, we create a single process in charge of backing up the
files using all cores. This massively improves the backup speed.

We use rayon to concurently process the pictures.
This commit is contained in:
Félix Baylac Jacqué 2023-08-25 20:59:35 +02:00
parent 156a0b0814
commit 9c767e4938
4 changed files with 365 additions and 63 deletions

258
Cargo.lock generated
View File

@ -53,7 +53,7 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -63,7 +63,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
dependencies = [
"anstyle",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -161,12 +161,80 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "console"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.45.0",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
dependencies = [
"cfg-if",
]
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "errno"
version = "0.3.2"
@ -175,7 +243,7 @@ checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
dependencies = [
"errno-dragonfly",
"libc",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -223,6 +291,29 @@ dependencies = [
"cc",
]
[[package]]
name = "indicatif"
version = "0.17.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b297dc40733f23a0e52728a58fa9489a5b7638a324932de16b41adc3ef80730"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"rayon",
"unicode-width",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "is-terminal"
version = "0.4.9"
@ -231,7 +322,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -252,6 +343,12 @@ dependencies = [
"mutate_once",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
@ -270,6 +367,15 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "mutate_once"
version = "0.1.1"
@ -285,6 +391,22 @@ dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.18.0"
@ -297,9 +419,17 @@ version = "0.1.0"
dependencies = [
"chrono",
"clap",
"indicatif",
"kamadak-exif",
"rayon",
]
[[package]]
name = "portable-atomic"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f32154ba0af3a075eefa1eda8bb414ee928f62303a54ea85b8d6638ff1a6ee9e"
[[package]]
name = "proc-macro2"
version = "1.0.66"
@ -318,6 +448,28 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "rustix"
version = "0.38.8"
@ -328,9 +480,15 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "strsim"
version = "0.10.0"
@ -365,6 +523,12 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -459,7 +623,16 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets",
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
@ -468,7 +641,22 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
"windows-targets 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
@ -477,51 +665,93 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"

View File

@ -9,7 +9,9 @@ repository = "https://git.alternativebit.fr/NinjaTrappeur/picobak"
[dependencies]
chrono = { version = "0.4.26", features = ["clock"] }
clap = { version = "4.3.24", features = ["derive"] }
indicatif = { version = "0.17.6", features = ["rayon"] }
kamadak-exif = "0.5.5"
rayon = "1.7.0"
[profile.release]
strip = true

View File

@ -37,8 +37,8 @@ Options:
-V, --version Print version
```
You can couple this tool with [GNU parallel](https://www.gnu.org/software/parallel/) to concurently backup multiple images and fully utilize a multicore system:
For instance:
```txt
ls dir-containing-pictures | parallel -j $(nproc) picobak /my/pic-backup-root/
ls dir-containing-pictures/* | picobak /my/pic-backup-root/
```

View File

@ -1,10 +1,14 @@
use std::fs::{create_dir_all, copy};
use std::{fs::File, path::PathBuf};
use std::path::Path;
use std::fmt;
use std::sync::Mutex;
use clap::Parser;
use exif::{Tag, In, Value};
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
use indicatif::ParallelProgressIterator;
use rayon::prelude::*;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
@ -12,71 +16,135 @@ struct CliArgs {
/// Pictures library directory
backup_root: String,
/// Picture to backup
file_path: String,
/// Do not create any directory or copy any file. Only prints out the operations it would perform
#[arg(short, long)]
dry_run: bool
file_path: Option<String>,
}
enum BackupSuccess {
AlreadyBackup(String),
Backup(String, PictureDatetimeOrigin)
}
enum BackupFailure {
AlreadyBackupButDifferent(String),
CopyError(String),
IncorrectFilename(String)
}
enum PictureDatetimeOrigin {
Exif,
FilesystemMetadata
}
static CREATE_DIR_MUTEX: Mutex<()> = Mutex::new(());
impl fmt::Display for BackupFailure {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::AlreadyBackupButDifferent(s) => write!(f, "{}: already exists in the photo library but has a different content", s),
Self::CopyError(s) => write!(f, "Copy error, {}", s),
Self::IncorrectFilename(s) => write!(f, "Incorrect Filename, {}", s)
}
}
}
fn main() {
let cli = CliArgs::parse();
if !cli.dry_run {
validate_args(&cli);
}
validate_args(&cli);
let stdin = std::io::stdin();
for line in stdin.lines() {
backup_one_file(&cli, &line.unwrap());
let filepaths = match cli.file_path {
Some(ref fp) => vec!(Ok(fp.to_string())),
None => stdin.lines()
.into_iter()
.map(|l| l.map_err(|_|BackupFailure::IncorrectFilename(String::from("Can't parse filename from stdin"))))
.collect()
};
let filepaths_len = filepaths.len() as u64;
let res: Vec<Result<BackupSuccess, BackupFailure>> = filepaths
.into_par_iter()
.progress_count(filepaths_len)
.map(|filepathres| {
let filepath = filepathres?;
backup_file(&cli, &filepath)
})
.collect();
display_backup_result(res)
}
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
let mut success: Vec<BackupSuccess> = Vec::new();
let mut failures: Vec<BackupFailure> = Vec::new();
results.into_iter().for_each(|e| match e {
Ok(s) => success.push(s),
Err(f) => failures.push(f)
});
eprintln!("Backup Statistics:");
eprintln!("");
eprintln!("Success: {}", success.len());
eprintln!("Failures: {}", failures.len());
if failures.len() != 0 {
eprintln!("");
eprintln!("WARNING: unable to backup some files:");
failures.iter().for_each(|f| eprintln!("{}", f));
}
}
/// Backup a file.
fn backup_one_file(cli: &CliArgs, file_path: &str) {
fn backup_file(cli: &CliArgs, file_path: &str) -> Result<BackupSuccess, BackupFailure> {
let filename = Path::new(file_path);
let file = File::open(filename).unwrap_or_else(
|_| panic!("ERROR: cannot open the {} file", file_path)
);
let datetime = get_picture_datetime(file_path, &file);
let picture_dir = find_backup_dir(&cli.backup_root, &datetime);
let file = File::open(filename).map_err(
|e| BackupFailure::CopyError(format!("cannot open the {} file: {}", file_path, e.to_string()))
)?;
let (datetime, origin) = get_picture_datetime(file_path, &file);
if !picture_dir.exists() {
if !cli.dry_run {
create_dir_all(&picture_dir)
.unwrap_or_else(
|_| panic!("ERROR: cannot create directory at {}", &picture_dir.display())
);
} else {
eprintln!("Would mkdir {}", &picture_dir.display());
}
} else if !picture_dir.is_dir() {
panic!("ERROR: {} already exists and is not a directory", &picture_dir.display())
}
let picture_dir = find_backup_dir(&cli.backup_root, &datetime);
upsert_picture_directory(&picture_dir);
let filename_name = filename.file_name()
.unwrap_or_else(|| panic!("Error: Incorrect file name {}", filename.display()));
.ok_or_else(|| BackupFailure::IncorrectFilename(
format!("Incorrect file name {}", filename.display())))?;
let target_filename = picture_dir.join(filename_name);
if !target_filename.is_file() {
if !cli.dry_run {
copy(filename, &target_filename)
.unwrap_or_else(|_| panic!("ERROR: cannot copy {} to {}", &filename.display(), &target_filename.display()));
} else {
eprintln!("Would copy {} to {}", filename.display(), target_filename.display());
match copy(filename, &target_filename) {
Ok(_) => Ok(BackupSuccess::Backup(
target_filename.into_os_string().into_string().unwrap(),
origin)),
Err(_) => {
eprintln!("ERROR: cannot copy {} to {}", &filename.display(), &target_filename.display());
Err(BackupFailure::CopyError(String::from(file_path)))
}
}
} else if same_files(filename, &target_filename) {
eprintln!("File already archived: {}", &filename.display())
Ok(BackupSuccess::AlreadyBackup(String::from(file_path)))
} else {
panic!("ERROR: {} already exists in {}, but the two files are different",
&filename.display(),
&target_filename.display())
Err(BackupFailure::AlreadyBackupButDifferent(String::from(file_path)))
}
}
fn upsert_picture_directory(picture_dir: &PathBuf) {
// Prevent concurrent directory creation by locking a mutex.
let _ = CREATE_DIR_MUTEX.lock();
if !picture_dir.exists() {
create_dir_all(&picture_dir)
.unwrap_or_else(
|e| panic!("ERROR: cannot create the backup directory {}: {}", &picture_dir.display(), e.to_string())
);
} else if !picture_dir.is_dir() {
panic!("ERROR: {} already exists and is not a directory. Can't use it to store a picture.", &picture_dir.display())
}
}
/// Retrieves when the picture has been shot from the EXIF metadata.
/// If no datetime EXIF data is attached to the file, use the file
/// last modification date.
fn get_picture_datetime(file_path: &str, file: &File) -> DateTime<Utc> {
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
let exif_datetime = get_picture_exif_datetime(file);
exif_datetime.unwrap_or_else(|| get_file_modified_time(file_path, file))
match exif_datetime {
Some(dt) => (dt, PictureDatetimeOrigin::Exif),
None => (get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata)
}
}
/// Retrieves the picture EXIF datetime.
@ -100,7 +168,6 @@ fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
/// If we cannot load the EXIF creation datetime, we end up using the
/// last modified time of the file.
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
eprintln!("No EXIF information available for {}, falling back to file mtime.", file_path);
let systemtime = file.metadata()
.unwrap_or_else(|_| panic!("Cannot retrieve UNIX file metadata for {}", file_path))
.modified()
@ -120,11 +187,17 @@ fn find_backup_dir(backup_root: &str, datetime: &DateTime<Utc>) -> PathBuf {
/// Sanity function making sure the user did not give us complete
/// garbage data.
fn validate_args(args: &CliArgs) {
if !Path::new(&args.file_path).is_file() {
panic!("ERROR: {} is not a file", &args.file_path);
match &args.file_path {
Some(file_path) => {
if !Path::new(&file_path).is_file() {
panic!("ERROR: {} is not a file", &file_path);
};
}
None => ()
};
if Path::new(&args.backup_root).is_file() {
panic!("ERROR: {} is a file, not a valid backup dir", &args.file_path);
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
};
}
@ -141,8 +214,5 @@ fn same_files(source: &Path, target: &Path) -> bool {
.unwrap_or_else(|_| panic!("Error: cannot open file {}", &target.display()))
.metadata()
.unwrap_or_else(|_| panic!("Error: cannot get metadata of file {}", &target.display()));
let source_modified = source_file.modified().unwrap_or_else(|_| panic!("ERROR: cannot find created datetime for {}", &source.display()));
let target_modified = target_file.modified().unwrap_or_else(|_| panic!("ERROR: cannot find created datetime for {}", &target.display()));
source_file.len() == target_file.len() && source_modified == target_modified
source_file.len() == target_file.len()
}