Use exiftool to parse more metadata when needed

While the EXIF crate is pretty good when it comes to parse JPEG and
PNG metadata, it does not support non-exif metadata such as MOV
metadata.

I'd like to organise my videos like my pictures. I did not find a
Rust-native swiss army knife library. So I sadly decided to shell out
metadata parsing to the exiftool program if the exif crate does not
manage to parse the picture metadata.

Despite its name, this program can parse a wide range of picture/video
shooting metadata outside of EXIF. That's really good.

If for some reason exiftool fails to parse the file creation time, we
fallback to the unix file datetime.
This commit is contained in:
Félix Baylac Jacqué 2023-09-17 12:09:03 +02:00
parent d3b7bea53b
commit cb40dda7e9
4 changed files with 108 additions and 6 deletions

45
Cargo.lock generated
View File

@ -325,6 +325,12 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.64" version = "0.3.64"
@ -422,6 +428,8 @@ dependencies = [
"indicatif", "indicatif",
"kamadak-exif", "kamadak-exif",
"rayon", "rayon",
"serde",
"serde_json",
] ]
[[package]] [[package]]
@ -483,12 +491,49 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.2.0" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"

View File

@ -12,6 +12,8 @@ clap = { version = "4.3.24", features = ["derive"] }
indicatif = { version = "0.17.6", features = ["rayon"] } indicatif = { version = "0.17.6", features = ["rayon"] }
kamadak-exif = "0.5.5" kamadak-exif = "0.5.5"
rayon = "1.7.0" rayon = "1.7.0"
serde = { version = "1.0.188", features = ["derive"] }
serde_json = "1.0.107"
[profile.release] [profile.release]
strip = true strip = true

View File

@ -4,8 +4,14 @@ pkgs.rustPlatform.buildRustPackage {
pname = "picobak"; pname = "picobak";
version = "0.1.0"; version = "0.1.0";
src = lib.cleanSource ./.; src = lib.cleanSource ./.;
cargoHash = "sha256-gytrsYdL9WuxJDZBaK/w+1KLmAKKBD711efHTzQqs4o="; cargoHash = "sha256-W0SLjlrqONMdTXoOlMilEvza2WEIVaKUJRraGR//qsw=";
meta = { meta = {
description = "Backup and organize your pictures library"; description = "Backup and organize your pictures library";
}; };
nativeBuildInputs = [ pkgs.makeWrapper ];
# Inject exiftool
postInstall = ''
wrapProgram $out/bin/picobak \
--prefix PATH : "${lib.makeBinPath [pkgs.exiftool]}"
'';
} }

View File

@ -1,4 +1,5 @@
use std::fs::{create_dir_all, copy}; use std::fs::{create_dir_all, copy};
use std::process::{Command};
use std::{fs::File, path::PathBuf}; use std::{fs::File, path::PathBuf};
use std::path::Path; use std::path::Path;
use std::fmt; use std::fmt;
@ -9,6 +10,7 @@ use exif::{Tag, In, Value};
use chrono::{Utc, DateTime, Datelike, NaiveDateTime}; use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
use indicatif::ParallelProgressIterator; use indicatif::ParallelProgressIterator;
use rayon::prelude::*; use rayon::prelude::*;
use serde::Deserialize;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)] #[command(author, version, about, long_about = None)]
@ -20,6 +22,13 @@ struct CliArgs {
file_path: Option<String>, file_path: Option<String>,
} }
/// Structure used to parse the JSON output of the exiftool program.
#[derive(Debug, Deserialize)]
struct ExifToolEntry {
#[serde(rename(deserialize = "CreateDate"))]
create_date: Option<String>
}
enum BackupSuccess { enum BackupSuccess {
AlreadyBackup(String), AlreadyBackup(String),
Backup(String, PictureDatetimeOrigin) Backup(String, PictureDatetimeOrigin)
@ -33,6 +42,7 @@ enum BackupFailure {
enum PictureDatetimeOrigin { enum PictureDatetimeOrigin {
Exif, Exif,
ExifTool,
FilesystemMetadata FilesystemMetadata
} }
@ -76,6 +86,7 @@ fn main() {
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) { fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
let mut nb_copy_exif: u32 = 0; let mut nb_copy_exif: u32 = 0;
let mut nb_copy_exiftool: u32 = 0;
let mut nb_copy_filesystem: u32 = 0; let mut nb_copy_filesystem: u32 = 0;
let mut nb_duplicates: u32 = 0; let mut nb_duplicates: u32 = 0;
let mut failures: Vec<BackupFailure> = Vec::new(); let mut failures: Vec<BackupFailure> = Vec::new();
@ -85,6 +96,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1, BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
BackupSuccess::Backup(_, origin) => match origin { BackupSuccess::Backup(_, origin) => match origin {
PictureDatetimeOrigin::Exif => nb_copy_exif +=1, PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
PictureDatetimeOrigin::ExifTool => nb_copy_exiftool +=1,
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1 PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
} }
} }
@ -97,6 +109,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem); eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
eprintln!("To classify these newly copied files, we used:"); eprintln!("To classify these newly copied files, we used:");
eprintln!(" {}: EXIF metadata", nb_copy_exif); eprintln!(" {}: EXIF metadata", nb_copy_exif);
eprintln!(" {}: the exiftool program", nb_copy_exiftool);
eprintln!(" {}: filesystem metadata", nb_copy_filesystem); eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
eprintln!("Failures: {}", failures.len()); eprintln!("Failures: {}", failures.len());
if failures.len() != 0 { if failures.len() != 0 {
@ -153,11 +166,14 @@ fn upsert_picture_directory(picture_dir: &PathBuf) {
/// If no datetime EXIF data is attached to the file, use the file /// If no datetime EXIF data is attached to the file, use the file
/// last modification date. /// last modification date.
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) { fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
let exif_datetime = get_picture_exif_datetime(file); // Try exif crate.
match exif_datetime { get_picture_exif_datetime(file).map(|dt| (dt, PictureDatetimeOrigin::Exif))
Some(dt) => (dt, PictureDatetimeOrigin::Exif), // Exif failed, shell out to exiftool.
None => (get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata) .or_else(|| {
} get_picture_exiftool_datetime(file_path)
.map(|dt| (dt, PictureDatetimeOrigin::ExifTool))})
// Exiftool failed as well. Fallback to Unix datetime.
.unwrap_or((get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata))
} }
/// Retrieves the picture EXIF datetime. /// Retrieves the picture EXIF datetime.
@ -178,6 +194,29 @@ fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
} }
} }
/// Shells out to the exiftool CLI. Despite its name, exiftool parses
/// much more metadata than exif. Such as MOV metadata.
fn get_picture_exiftool_datetime(file_path: &str) -> Option<DateTime<Utc>> {
let output = Command::new("exiftool")
.args(["-j", "-P", "-CreateDate", file_path])
.output()
.ok()?;
if !output.status.success() {
return None
}
let stdout_str = String::from_utf8(output.stdout).ok()?;
let parsed_output: Vec<ExifToolEntry> = serde_json::from_str(&stdout_str).ok()?;
if parsed_output.len() != 1 {
None
} else {
let entry = parsed_output.get(0)?;
let date: &str = &entry.create_date.as_ref()?;
NaiveDateTime::parse_from_str(date, "%Y:%m:%d %H:%M:%S")
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
.ok()
}
}
/// If we cannot load the EXIF creation datetime, we end up using the /// If we cannot load the EXIF creation datetime, we end up using the
/// last modified time of the file. /// last modified time of the file.
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> { fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
@ -212,6 +251,16 @@ fn validate_args(args: &CliArgs) {
if Path::new(&args.backup_root).is_file() { if Path::new(&args.backup_root).is_file() {
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root); panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
}; };
let exif_tool_in_path = Command::new("bash")
.args(["-c", "command exiftool"])
.output()
.ok()
.map(|e| e.status.success())
.unwrap();
if !exif_tool_in_path {
eprintln!("Exiftool doesn't seem to be present in $PATH. Install it if you want to be able to extract more pictures metadata");
}
} }
/// Compare two files and check if they're the same. We're not really /// Compare two files and check if they're the same. We're not really