Use exiftool to parse more metadata when needed

While the EXIF crate is pretty good when it comes to parse JPEG and
PNG metadata, it does not support non-exif metadata such as MOV
metadata.

I'd like to organise my videos like my pictures. I did not find a
Rust-native swiss army knife library. So I sadly decided to shell out
metadata parsing to the exiftool program if the exif crate does not
manage to parse the picture metadata.

Despite its name, this program can parse a wide range of picture/video
shooting metadata outside of EXIF. That's really good.

If for some reason exiftool fails to parse the file creation time, we
fallback to the unix file datetime.
This commit is contained in:
Félix Baylac Jacqué 2023-09-17 12:09:03 +02:00
parent d3b7bea53b
commit cb40dda7e9
4 changed files with 108 additions and 6 deletions

45
Cargo.lock generated
View file

@ -325,6 +325,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "js-sys"
version = "0.3.64"
@ -422,6 +428,8 @@ dependencies = [
"indicatif",
"kamadak-exif",
"rayon",
"serde",
"serde_json",
]
[[package]]
@ -483,12 +491,49 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "strsim"
version = "0.10.0"

View file

@ -12,6 +12,8 @@ clap = { version = "4.3.24", features = ["derive"] }
indicatif = { version = "0.17.6", features = ["rayon"] }
kamadak-exif = "0.5.5"
rayon = "1.7.0"
serde = { version = "1.0.188", features = ["derive"] }
serde_json = "1.0.107"
[profile.release]
strip = true

View file

@ -4,8 +4,14 @@ pkgs.rustPlatform.buildRustPackage {
pname = "picobak";
version = "0.1.0";
src = lib.cleanSource ./.;
cargoHash = "sha256-gytrsYdL9WuxJDZBaK/w+1KLmAKKBD711efHTzQqs4o=";
cargoHash = "sha256-W0SLjlrqONMdTXoOlMilEvza2WEIVaKUJRraGR//qsw=";
meta = {
description = "Backup and organize your pictures library";
};
nativeBuildInputs = [ pkgs.makeWrapper ];
# Inject exiftool
postInstall = ''
wrapProgram $out/bin/picobak \
--prefix PATH : "${lib.makeBinPath [pkgs.exiftool]}"
'';
}

View file

@ -1,4 +1,5 @@
use std::fs::{create_dir_all, copy};
use std::process::{Command};
use std::{fs::File, path::PathBuf};
use std::path::Path;
use std::fmt;
@ -9,6 +10,7 @@ use exif::{Tag, In, Value};
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
use indicatif::ParallelProgressIterator;
use rayon::prelude::*;
use serde::Deserialize;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
@ -20,6 +22,13 @@ struct CliArgs {
file_path: Option<String>,
}
/// Structure used to parse the JSON output of the exiftool program.
#[derive(Debug, Deserialize)]
struct ExifToolEntry {
#[serde(rename(deserialize = "CreateDate"))]
create_date: Option<String>
}
enum BackupSuccess {
AlreadyBackup(String),
Backup(String, PictureDatetimeOrigin)
@ -33,6 +42,7 @@ enum BackupFailure {
enum PictureDatetimeOrigin {
Exif,
ExifTool,
FilesystemMetadata
}
@ -76,6 +86,7 @@ fn main() {
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
let mut nb_copy_exif: u32 = 0;
let mut nb_copy_exiftool: u32 = 0;
let mut nb_copy_filesystem: u32 = 0;
let mut nb_duplicates: u32 = 0;
let mut failures: Vec<BackupFailure> = Vec::new();
@ -85,6 +96,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
BackupSuccess::Backup(_, origin) => match origin {
PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
PictureDatetimeOrigin::ExifTool => nb_copy_exiftool +=1,
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
}
}
@ -97,6 +109,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
eprintln!("To classify these newly copied files, we used:");
eprintln!(" {}: EXIF metadata", nb_copy_exif);
eprintln!(" {}: the exiftool program", nb_copy_exiftool);
eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
eprintln!("Failures: {}", failures.len());
if failures.len() != 0 {
@ -153,11 +166,14 @@ fn upsert_picture_directory(picture_dir: &PathBuf) {
/// If no datetime EXIF data is attached to the file, use the file
/// last modification date.
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
let exif_datetime = get_picture_exif_datetime(file);
match exif_datetime {
Some(dt) => (dt, PictureDatetimeOrigin::Exif),
None => (get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata)
}
// Try exif crate.
get_picture_exif_datetime(file).map(|dt| (dt, PictureDatetimeOrigin::Exif))
// Exif failed, shell out to exiftool.
.or_else(|| {
get_picture_exiftool_datetime(file_path)
.map(|dt| (dt, PictureDatetimeOrigin::ExifTool))})
// Exiftool failed as well. Fallback to Unix datetime.
.unwrap_or((get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata))
}
/// Retrieves the picture EXIF datetime.
@ -178,6 +194,29 @@ fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
}
}
/// Shells out to the exiftool CLI. Despite its name, exiftool parses
/// much more metadata than exif. Such as MOV metadata.
fn get_picture_exiftool_datetime(file_path: &str) -> Option<DateTime<Utc>> {
let output = Command::new("exiftool")
.args(["-j", "-P", "-CreateDate", file_path])
.output()
.ok()?;
if !output.status.success() {
return None
}
let stdout_str = String::from_utf8(output.stdout).ok()?;
let parsed_output: Vec<ExifToolEntry> = serde_json::from_str(&stdout_str).ok()?;
if parsed_output.len() != 1 {
None
} else {
let entry = parsed_output.get(0)?;
let date: &str = &entry.create_date.as_ref()?;
NaiveDateTime::parse_from_str(date, "%Y:%m:%d %H:%M:%S")
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
.ok()
}
}
/// If we cannot load the EXIF creation datetime, we end up using the
/// last modified time of the file.
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
@ -212,6 +251,16 @@ fn validate_args(args: &CliArgs) {
if Path::new(&args.backup_root).is_file() {
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
};
let exif_tool_in_path = Command::new("bash")
.args(["-c", "command exiftool"])
.output()
.ok()
.map(|e| e.status.success())
.unwrap();
if !exif_tool_in_path {
eprintln!("Exiftool doesn't seem to be present in $PATH. Install it if you want to be able to extract more pictures metadata");
}
}
/// Compare two files and check if they're the same. We're not really