Use exiftool to parse more metadata when needed
While the EXIF crate is pretty good when it comes to parse JPEG and PNG metadata, it does not support non-exif metadata such as MOV metadata. I'd like to organise my videos like my pictures. I did not find a Rust-native swiss army knife library. So I sadly decided to shell out metadata parsing to the exiftool program if the exif crate does not manage to parse the picture metadata. Despite its name, this program can parse a wide range of picture/video shooting metadata outside of EXIF. That's really good. If for some reason exiftool fails to parse the file creation time, we fallback to the unix file datetime.
This commit is contained in:
parent
d3b7bea53b
commit
cb40dda7e9
45
Cargo.lock
generated
45
Cargo.lock
generated
|
@ -325,6 +325,12 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
|
@ -422,6 +428,8 @@ dependencies = [
|
|||
"indicatif",
|
||||
"kamadak-exif",
|
||||
"rayon",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -483,12 +491,49 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.188"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.188"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.107"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
|
|
|
@ -12,6 +12,8 @@ clap = { version = "4.3.24", features = ["derive"] }
|
|||
indicatif = { version = "0.17.6", features = ["rayon"] }
|
||||
kamadak-exif = "0.5.5"
|
||||
rayon = "1.7.0"
|
||||
serde = { version = "1.0.188", features = ["derive"] }
|
||||
serde_json = "1.0.107"
|
||||
|
||||
[profile.release]
|
||||
strip = true
|
||||
|
|
|
@ -4,8 +4,14 @@ pkgs.rustPlatform.buildRustPackage {
|
|||
pname = "picobak";
|
||||
version = "0.1.0";
|
||||
src = lib.cleanSource ./.;
|
||||
cargoHash = "sha256-gytrsYdL9WuxJDZBaK/w+1KLmAKKBD711efHTzQqs4o=";
|
||||
cargoHash = "sha256-W0SLjlrqONMdTXoOlMilEvza2WEIVaKUJRraGR//qsw=";
|
||||
meta = {
|
||||
description = "Backup and organize your pictures library";
|
||||
};
|
||||
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||
# Inject exiftool
|
||||
postInstall = ''
|
||||
wrapProgram $out/bin/picobak \
|
||||
--prefix PATH : "${lib.makeBinPath [pkgs.exiftool]}"
|
||||
'';
|
||||
}
|
||||
|
|
59
src/main.rs
59
src/main.rs
|
@ -1,4 +1,5 @@
|
|||
use std::fs::{create_dir_all, copy};
|
||||
use std::process::{Command};
|
||||
use std::{fs::File, path::PathBuf};
|
||||
use std::path::Path;
|
||||
use std::fmt;
|
||||
|
@ -9,6 +10,7 @@ use exif::{Tag, In, Value};
|
|||
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
|
||||
use indicatif::ParallelProgressIterator;
|
||||
use rayon::prelude::*;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
|
@ -20,6 +22,13 @@ struct CliArgs {
|
|||
file_path: Option<String>,
|
||||
}
|
||||
|
||||
/// Structure used to parse the JSON output of the exiftool program.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ExifToolEntry {
|
||||
#[serde(rename(deserialize = "CreateDate"))]
|
||||
create_date: Option<String>
|
||||
}
|
||||
|
||||
enum BackupSuccess {
|
||||
AlreadyBackup(String),
|
||||
Backup(String, PictureDatetimeOrigin)
|
||||
|
@ -33,6 +42,7 @@ enum BackupFailure {
|
|||
|
||||
enum PictureDatetimeOrigin {
|
||||
Exif,
|
||||
ExifTool,
|
||||
FilesystemMetadata
|
||||
}
|
||||
|
||||
|
@ -76,6 +86,7 @@ fn main() {
|
|||
|
||||
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
||||
let mut nb_copy_exif: u32 = 0;
|
||||
let mut nb_copy_exiftool: u32 = 0;
|
||||
let mut nb_copy_filesystem: u32 = 0;
|
||||
let mut nb_duplicates: u32 = 0;
|
||||
let mut failures: Vec<BackupFailure> = Vec::new();
|
||||
|
@ -85,6 +96,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
|||
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
|
||||
BackupSuccess::Backup(_, origin) => match origin {
|
||||
PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
|
||||
PictureDatetimeOrigin::ExifTool => nb_copy_exiftool +=1,
|
||||
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
|
||||
}
|
||||
}
|
||||
|
@ -97,6 +109,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
|||
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
|
||||
eprintln!("To classify these newly copied files, we used:");
|
||||
eprintln!(" {}: EXIF metadata", nb_copy_exif);
|
||||
eprintln!(" {}: the exiftool program", nb_copy_exiftool);
|
||||
eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
|
||||
eprintln!("Failures: {}", failures.len());
|
||||
if failures.len() != 0 {
|
||||
|
@ -153,11 +166,14 @@ fn upsert_picture_directory(picture_dir: &PathBuf) {
|
|||
/// If no datetime EXIF data is attached to the file, use the file
|
||||
/// last modification date.
|
||||
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
|
||||
let exif_datetime = get_picture_exif_datetime(file);
|
||||
match exif_datetime {
|
||||
Some(dt) => (dt, PictureDatetimeOrigin::Exif),
|
||||
None => (get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata)
|
||||
}
|
||||
// Try exif crate.
|
||||
get_picture_exif_datetime(file).map(|dt| (dt, PictureDatetimeOrigin::Exif))
|
||||
// Exif failed, shell out to exiftool.
|
||||
.or_else(|| {
|
||||
get_picture_exiftool_datetime(file_path)
|
||||
.map(|dt| (dt, PictureDatetimeOrigin::ExifTool))})
|
||||
// Exiftool failed as well. Fallback to Unix datetime.
|
||||
.unwrap_or((get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata))
|
||||
}
|
||||
|
||||
/// Retrieves the picture EXIF datetime.
|
||||
|
@ -178,6 +194,29 @@ fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Shells out to the exiftool CLI. Despite its name, exiftool parses
|
||||
/// much more metadata than exif. Such as MOV metadata.
|
||||
fn get_picture_exiftool_datetime(file_path: &str) -> Option<DateTime<Utc>> {
|
||||
let output = Command::new("exiftool")
|
||||
.args(["-j", "-P", "-CreateDate", file_path])
|
||||
.output()
|
||||
.ok()?;
|
||||
if !output.status.success() {
|
||||
return None
|
||||
}
|
||||
let stdout_str = String::from_utf8(output.stdout).ok()?;
|
||||
let parsed_output: Vec<ExifToolEntry> = serde_json::from_str(&stdout_str).ok()?;
|
||||
if parsed_output.len() != 1 {
|
||||
None
|
||||
} else {
|
||||
let entry = parsed_output.get(0)?;
|
||||
let date: &str = &entry.create_date.as_ref()?;
|
||||
NaiveDateTime::parse_from_str(date, "%Y:%m:%d %H:%M:%S")
|
||||
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
|
||||
.ok()
|
||||
}
|
||||
}
|
||||
|
||||
/// If we cannot load the EXIF creation datetime, we end up using the
|
||||
/// last modified time of the file.
|
||||
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
|
||||
|
@ -212,6 +251,16 @@ fn validate_args(args: &CliArgs) {
|
|||
if Path::new(&args.backup_root).is_file() {
|
||||
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
|
||||
};
|
||||
|
||||
let exif_tool_in_path = Command::new("bash")
|
||||
.args(["-c", "command exiftool"])
|
||||
.output()
|
||||
.ok()
|
||||
.map(|e| e.status.success())
|
||||
.unwrap();
|
||||
if !exif_tool_in_path {
|
||||
eprintln!("Exiftool doesn't seem to be present in $PATH. Install it if you want to be able to extract more pictures metadata");
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare two files and check if they're the same. We're not really
|
||||
|
|
Loading…
Reference in a new issue