Use exiftool to parse more metadata when needed
While the EXIF crate is pretty good when it comes to parse JPEG and PNG metadata, it does not support non-exif metadata such as MOV metadata. I'd like to organise my videos like my pictures. I did not find a Rust-native swiss army knife library. So I sadly decided to shell out metadata parsing to the exiftool program if the exif crate does not manage to parse the picture metadata. Despite its name, this program can parse a wide range of picture/video shooting metadata outside of EXIF. That's really good. If for some reason exiftool fails to parse the file creation time, we fallback to the unix file datetime.
This commit is contained in:
parent
d3b7bea53b
commit
cb40dda7e9
|
@ -325,6 +325,12 @@ dependencies = [
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "js-sys"
|
name = "js-sys"
|
||||||
version = "0.3.64"
|
version = "0.3.64"
|
||||||
|
@ -422,6 +428,8 @@ dependencies = [
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"kamadak-exif",
|
"kamadak-exif",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -483,12 +491,49 @@ dependencies = [
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ryu"
|
||||||
|
version = "1.0.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scopeguard"
|
name = "scopeguard"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.188"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.188"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.107"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
|
|
@ -12,6 +12,8 @@ clap = { version = "4.3.24", features = ["derive"] }
|
||||||
indicatif = { version = "0.17.6", features = ["rayon"] }
|
indicatif = { version = "0.17.6", features = ["rayon"] }
|
||||||
kamadak-exif = "0.5.5"
|
kamadak-exif = "0.5.5"
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
||||||
|
serde = { version = "1.0.188", features = ["derive"] }
|
||||||
|
serde_json = "1.0.107"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
strip = true
|
strip = true
|
||||||
|
|
|
@ -4,8 +4,14 @@ pkgs.rustPlatform.buildRustPackage {
|
||||||
pname = "picobak";
|
pname = "picobak";
|
||||||
version = "0.1.0";
|
version = "0.1.0";
|
||||||
src = lib.cleanSource ./.;
|
src = lib.cleanSource ./.;
|
||||||
cargoHash = "sha256-gytrsYdL9WuxJDZBaK/w+1KLmAKKBD711efHTzQqs4o=";
|
cargoHash = "sha256-W0SLjlrqONMdTXoOlMilEvza2WEIVaKUJRraGR//qsw=";
|
||||||
meta = {
|
meta = {
|
||||||
description = "Backup and organize your pictures library";
|
description = "Backup and organize your pictures library";
|
||||||
};
|
};
|
||||||
|
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||||
|
# Inject exiftool
|
||||||
|
postInstall = ''
|
||||||
|
wrapProgram $out/bin/picobak \
|
||||||
|
--prefix PATH : "${lib.makeBinPath [pkgs.exiftool]}"
|
||||||
|
'';
|
||||||
}
|
}
|
||||||
|
|
59
src/main.rs
59
src/main.rs
|
@ -1,4 +1,5 @@
|
||||||
use std::fs::{create_dir_all, copy};
|
use std::fs::{create_dir_all, copy};
|
||||||
|
use std::process::{Command};
|
||||||
use std::{fs::File, path::PathBuf};
|
use std::{fs::File, path::PathBuf};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
@ -9,6 +10,7 @@ use exif::{Tag, In, Value};
|
||||||
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
|
use chrono::{Utc, DateTime, Datelike, NaiveDateTime};
|
||||||
use indicatif::ParallelProgressIterator;
|
use indicatif::ParallelProgressIterator;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
|
@ -20,6 +22,13 @@ struct CliArgs {
|
||||||
file_path: Option<String>,
|
file_path: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Structure used to parse the JSON output of the exiftool program.
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ExifToolEntry {
|
||||||
|
#[serde(rename(deserialize = "CreateDate"))]
|
||||||
|
create_date: Option<String>
|
||||||
|
}
|
||||||
|
|
||||||
enum BackupSuccess {
|
enum BackupSuccess {
|
||||||
AlreadyBackup(String),
|
AlreadyBackup(String),
|
||||||
Backup(String, PictureDatetimeOrigin)
|
Backup(String, PictureDatetimeOrigin)
|
||||||
|
@ -33,6 +42,7 @@ enum BackupFailure {
|
||||||
|
|
||||||
enum PictureDatetimeOrigin {
|
enum PictureDatetimeOrigin {
|
||||||
Exif,
|
Exif,
|
||||||
|
ExifTool,
|
||||||
FilesystemMetadata
|
FilesystemMetadata
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,6 +86,7 @@ fn main() {
|
||||||
|
|
||||||
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
||||||
let mut nb_copy_exif: u32 = 0;
|
let mut nb_copy_exif: u32 = 0;
|
||||||
|
let mut nb_copy_exiftool: u32 = 0;
|
||||||
let mut nb_copy_filesystem: u32 = 0;
|
let mut nb_copy_filesystem: u32 = 0;
|
||||||
let mut nb_duplicates: u32 = 0;
|
let mut nb_duplicates: u32 = 0;
|
||||||
let mut failures: Vec<BackupFailure> = Vec::new();
|
let mut failures: Vec<BackupFailure> = Vec::new();
|
||||||
|
@ -85,6 +96,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
||||||
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
|
BackupSuccess::AlreadyBackup(_) => nb_duplicates +=1,
|
||||||
BackupSuccess::Backup(_, origin) => match origin {
|
BackupSuccess::Backup(_, origin) => match origin {
|
||||||
PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
|
PictureDatetimeOrigin::Exif => nb_copy_exif +=1,
|
||||||
|
PictureDatetimeOrigin::ExifTool => nb_copy_exiftool +=1,
|
||||||
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
|
PictureDatetimeOrigin::FilesystemMetadata => nb_copy_filesystem +=1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,6 +109,7 @@ fn display_backup_result(results: Vec<Result<BackupSuccess, BackupFailure>>) {
|
||||||
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
|
eprintln!("Copied: {}", nb_copy_exif + nb_copy_filesystem);
|
||||||
eprintln!("To classify these newly copied files, we used:");
|
eprintln!("To classify these newly copied files, we used:");
|
||||||
eprintln!(" {}: EXIF metadata", nb_copy_exif);
|
eprintln!(" {}: EXIF metadata", nb_copy_exif);
|
||||||
|
eprintln!(" {}: the exiftool program", nb_copy_exiftool);
|
||||||
eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
|
eprintln!(" {}: filesystem metadata", nb_copy_filesystem);
|
||||||
eprintln!("Failures: {}", failures.len());
|
eprintln!("Failures: {}", failures.len());
|
||||||
if failures.len() != 0 {
|
if failures.len() != 0 {
|
||||||
|
@ -153,11 +166,14 @@ fn upsert_picture_directory(picture_dir: &PathBuf) {
|
||||||
/// If no datetime EXIF data is attached to the file, use the file
|
/// If no datetime EXIF data is attached to the file, use the file
|
||||||
/// last modification date.
|
/// last modification date.
|
||||||
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
|
fn get_picture_datetime(file_path: &str, file: &File) -> (DateTime<Utc>, PictureDatetimeOrigin) {
|
||||||
let exif_datetime = get_picture_exif_datetime(file);
|
// Try exif crate.
|
||||||
match exif_datetime {
|
get_picture_exif_datetime(file).map(|dt| (dt, PictureDatetimeOrigin::Exif))
|
||||||
Some(dt) => (dt, PictureDatetimeOrigin::Exif),
|
// Exif failed, shell out to exiftool.
|
||||||
None => (get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata)
|
.or_else(|| {
|
||||||
}
|
get_picture_exiftool_datetime(file_path)
|
||||||
|
.map(|dt| (dt, PictureDatetimeOrigin::ExifTool))})
|
||||||
|
// Exiftool failed as well. Fallback to Unix datetime.
|
||||||
|
.unwrap_or((get_file_modified_time(file_path, file), PictureDatetimeOrigin::FilesystemMetadata))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieves the picture EXIF datetime.
|
/// Retrieves the picture EXIF datetime.
|
||||||
|
@ -178,6 +194,29 @@ fn get_picture_exif_datetime(file: &File) -> Option<DateTime<Utc>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shells out to the exiftool CLI. Despite its name, exiftool parses
|
||||||
|
/// much more metadata than exif. Such as MOV metadata.
|
||||||
|
fn get_picture_exiftool_datetime(file_path: &str) -> Option<DateTime<Utc>> {
|
||||||
|
let output = Command::new("exiftool")
|
||||||
|
.args(["-j", "-P", "-CreateDate", file_path])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
if !output.status.success() {
|
||||||
|
return None
|
||||||
|
}
|
||||||
|
let stdout_str = String::from_utf8(output.stdout).ok()?;
|
||||||
|
let parsed_output: Vec<ExifToolEntry> = serde_json::from_str(&stdout_str).ok()?;
|
||||||
|
if parsed_output.len() != 1 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let entry = parsed_output.get(0)?;
|
||||||
|
let date: &str = &entry.create_date.as_ref()?;
|
||||||
|
NaiveDateTime::parse_from_str(date, "%Y:%m:%d %H:%M:%S")
|
||||||
|
.map(|naive_datetime| DateTime::from_utc(naive_datetime, Utc))
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// If we cannot load the EXIF creation datetime, we end up using the
|
/// If we cannot load the EXIF creation datetime, we end up using the
|
||||||
/// last modified time of the file.
|
/// last modified time of the file.
|
||||||
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
|
fn get_file_modified_time(file_path: &str, file: &File) -> DateTime<Utc> {
|
||||||
|
@ -212,6 +251,16 @@ fn validate_args(args: &CliArgs) {
|
||||||
if Path::new(&args.backup_root).is_file() {
|
if Path::new(&args.backup_root).is_file() {
|
||||||
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
|
panic!("ERROR: {} is a file, not a valid backup dir", &args.backup_root);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let exif_tool_in_path = Command::new("bash")
|
||||||
|
.args(["-c", "command exiftool"])
|
||||||
|
.output()
|
||||||
|
.ok()
|
||||||
|
.map(|e| e.status.success())
|
||||||
|
.unwrap();
|
||||||
|
if !exif_tool_in_path {
|
||||||
|
eprintln!("Exiftool doesn't seem to be present in $PATH. Install it if you want to be able to extract more pictures metadata");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compare two files and check if they're the same. We're not really
|
/// Compare two files and check if they're the same. We're not really
|
||||||
|
|
Loading…
Reference in New Issue