Skip to content

Commit e8d95d1

Browse files
teh-hippoCopilot
andcommitted
feat: change manifest PK from local_path to asset_resource
Replace PK (asset_id, zone_id, local_path) with (asset_id, zone_id, asset_resource) where asset_resource is the Apple iCloud resource field prefix (e.g. resOriginal, resOriginalVidCompl). This makes local_path an updateable column, so config changes like toggling --keep-unicode-in-filenames update the existing manifest row instead of creating a duplicate. - Add asset_resource column (schema v3) - Add version_to_resource() mapping in version_size.py - Update all manifest.lookup/upsert/remove calls in base.py - Migration: v0/v1/v2 DBs get asset_resource via ALTER + table rebuild - 7 new unit tests for asset_resource PK behaviour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 2635da7 commit e8d95d1

File tree

6 files changed

+236
-57
lines changed

6 files changed

+236
-57
lines changed

src/icloudpd/base.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
size_to_suffix,
9393
store_password_in_keyring,
9494
)
95-
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize
95+
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize, version_to_resource
9696

9797
freeze_support() # fmt: skip # fixing tqdm on macos
9898

@@ -128,6 +128,7 @@ def _check_collision(
128128
directory: str,
129129
dir_cache: "DirCache",
130130
logger: Logger,
131+
asset_resource: str = "resOriginal",
131132
) -> tuple[str, str, bool, str | None]:
132133
"""Check if another asset owns this path and self-heal if needed.
133134
@@ -140,7 +141,7 @@ def _check_collision(
140141
dedup_suffix = f"_{suffix}"
141142
new_download_path = add_suffix_to_filename(dedup_suffix, download_path)
142143
new_rel_path = os.path.relpath(new_download_path, directory)
143-
manifest.update_path(photo_id, manifest.zone_id, rel_path, new_rel_path)
144+
manifest.update_path(photo_id, manifest.zone_id, asset_resource, new_rel_path)
144145
file_exists = dir_cache.isfile(new_download_path)
145146
logger.debug(
146147
"Collision on %s (owned by %s), deduping %s to %s",
@@ -874,8 +875,9 @@ def _write_exif_with_mtime_check(
874875

875876
wrote = write_file_metadata(file_path, update, set(write_metadata_exif), dry_run)
876877

877-
# Record mtime after successful write so we can skip next run
878-
if wrote and not dry_run and manifest is not None:
878+
# Record mtime so we can skip exiftool on subsequent runs.
879+
# Set after both successful writes AND confirmed no-ops (metadata already correct).
880+
if not dry_run and manifest is not None:
879881
try:
880882
mtime = os.stat(file_path).st_mtime
881883
manifest.update_file_mtime(photo.id, manifest.zone_id, asset_resource, mtime)
@@ -1000,16 +1002,18 @@ def download_builder(
10001002
# Compute relative path for manifest — use the actual file path on disk
10011003
actual_path = original_download_path if original_download_path and file_exists else download_path
10021004
rel_path = os.path.relpath(actual_path, directory) if manifest else ""
1005+
asset_res = version_to_resource(download_size)
10031006

10041007
dedup_download = False
10051008
if file_exists:
10061009
if manifest is not None:
10071010
# Identity-based matching: check manifest for this asset
1008-
manifest_row = manifest.lookup(photo.id, manifest.zone_id, rel_path)
1011+
manifest_row = manifest.lookup(photo.id, manifest.zone_id, asset_res)
10091012
if manifest_row is not None:
10101013
# Check if this asset actually owns the file
10111014
rel_path, download_path, file_exists, dedup_suffix = _check_collision(
10121015
manifest, photo.id, rel_path, download_path, directory, dir_cache, logger,
1016+
asset_resource=asset_res,
10131017
)
10141018
dedup_download = dedup_suffix is not None and not file_exists
10151019
if dedup_suffix is None and manifest_row.version_size != version.size:
@@ -1039,6 +1043,7 @@ def download_builder(
10391043
zone_id=manifest.zone_id,
10401044
local_path=rel_path,
10411045
version_size=version.size,
1046+
asset_resource=asset_res,
10421047
**meta,
10431048
)
10441049
else:
@@ -1059,6 +1064,7 @@ def download_builder(
10591064
zone_id=manifest.zone_id,
10601065
local_path=rel_path,
10611066
version_size=version.size,
1067+
asset_resource=asset_res,
10621068
**meta,
10631069
)
10641070
logger.debug(
@@ -1075,6 +1081,7 @@ def download_builder(
10751081
zone_id=manifest.zone_id,
10761082
local_path=rel_path,
10771083
version_size=version.size,
1084+
asset_resource=asset_res,
10781085
**meta,
10791086
)
10801087
logger.debug(
@@ -1151,6 +1158,7 @@ def download_builder(
11511158
zone_id=manifest.zone_id,
11521159
local_path=rel_path,
11531160
version_size=version.size,
1161+
asset_resource=asset_res,
11541162
**meta,
11551163
)
11561164
logger.info("Downloaded %s", truncated_path)
@@ -1170,6 +1178,7 @@ def download_builder(
11701178
# Also download the live photo if present
11711179
if not skip_live_photos:
11721180
lp_size = live_photo_size
1181+
lp_res = version_to_resource(lp_size)
11731182
photo_versions_with_policy = photo.versions_with_raw_policy(raw_policy)
11741183
if lp_size in photo_versions_with_policy:
11751184
version = photo_versions_with_policy[lp_size]
@@ -1200,7 +1209,7 @@ def download_builder(
12001209
if only_print_filenames:
12011210
if lp_file_exists and manifest is not None:
12021211
# Check if version changed — need to print the filename for re-download
1203-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1212+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
12041213
if lp_manifest_row is not None and lp_manifest_row.version_size != version.size:
12051214
lp_file_exists = False
12061215
if not lp_file_exists:
@@ -1219,11 +1228,12 @@ def download_builder(
12191228
else:
12201229
if lp_file_exists:
12211230
if manifest is not None:
1222-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1231+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
12231232
if lp_manifest_row is not None:
12241233
# Check if this asset owns the LP file
12251234
lp_rel_path, lp_download_path, lp_file_exists, lp_dedup = _check_collision(
12261235
manifest, photo.id, lp_rel_path, lp_download_path, directory, dir_cache, logger,
1236+
asset_resource=lp_res,
12271237
)
12281238
if lp_dedup is not None:
12291239
# Collision handled — propagate suffix
@@ -1245,6 +1255,7 @@ def download_builder(
12451255
zone_id=manifest.zone_id,
12461256
local_path=lp_rel_path,
12471257
version_size=version.size,
1258+
asset_resource=lp_res,
12481259
**meta,
12491260
)
12501261
else:
@@ -1263,6 +1274,7 @@ def download_builder(
12631274
zone_id=manifest.zone_id,
12641275
local_path=lp_rel_path,
12651276
version_size=version.size,
1277+
asset_resource=lp_res,
12661278
**meta,
12671279
)
12681280
logger.debug(
@@ -1277,6 +1289,7 @@ def download_builder(
12771289
zone_id=manifest.zone_id,
12781290
local_path=lp_rel_path,
12791291
version_size=version.size,
1292+
asset_resource=lp_res,
12801293
**meta,
12811294
)
12821295
elif file_match_policy == FileMatchPolicy.NAME_SIZE_DEDUP_WITH_SUFFIX:
@@ -1315,6 +1328,7 @@ def download_builder(
13151328
zone_id=manifest.zone_id,
13161329
local_path=lp_rel_path,
13171330
version_size=version.size,
1331+
asset_resource=lp_res,
13181332
**meta,
13191333
)
13201334
logger.info("Downloaded %s", truncated_path)

src/icloudpd/manifest.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
The manifest lives at {download_dir}/.icloudpd.db and travels with the library.
1111
"""
1212

13+
import contextlib
1314
import logging
1415
import os
1516
import sqlite3
@@ -25,6 +26,7 @@
2526
CREATE TABLE IF NOT EXISTS manifest (
2627
asset_id TEXT NOT NULL,
2728
zone_id TEXT NOT NULL DEFAULT '',
29+
asset_resource TEXT NOT NULL DEFAULT 'resOriginal',
2830
local_path TEXT NOT NULL,
2931
version_size INTEGER NOT NULL,
3032
version_checksum TEXT,
@@ -88,6 +90,7 @@ class ManifestRow:
8890

8991
asset_id: str
9092
zone_id: str
93+
asset_resource: str
9194
local_path: str
9295
version_size: int
9396
version_checksum: str | None
@@ -125,7 +128,7 @@ class ManifestRow:
125128

126129

127130
_ALL_COLUMNS = (
128-
"asset_id, zone_id, local_path, version_size, version_checksum, "
131+
"asset_id, zone_id, asset_resource, local_path, version_size, version_checksum, "
129132
"change_tag, downloaded_at, last_updated_at, item_type, filename, "
130133
"asset_date, added_date, is_favorite, is_hidden, is_deleted, "
131134
"original_width, original_height, duration, orientation, "
@@ -224,6 +227,7 @@ def _migrate_from_v0(self) -> None:
224227
self._conn.execute(f"ALTER TABLE manifest ADD COLUMN {col_name} {col_def}") # type: ignore[union-attr]
225228
logger.info("Migrated manifest DB from v0 to v%d (%d columns added)",
226229
SCHEMA_VERSION, sum(1 for c, _ in new_columns if c not in existing))
230+
self._rebuild_pk()
227231
self._conn.execute( # type: ignore[union-attr]
228232
"CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path)"
229233
)
@@ -232,10 +236,28 @@ def _run_migrations(self, from_version: int) -> None:
232236
"""Run incremental migrations from from_version to SCHEMA_VERSION."""
233237
for version, sql in _MIGRATIONS:
234238
if version > from_version:
235-
self._conn.execute(sql) # type: ignore[union-attr]
239+
with contextlib.suppress(sqlite3.OperationalError):
240+
self._conn.execute(sql) # type: ignore[union-attr]
241+
if from_version < 3:
242+
self._rebuild_pk()
236243
self._conn.execute(f"PRAGMA user_version={SCHEMA_VERSION}") # type: ignore[union-attr]
237244
self._conn.commit() # type: ignore[union-attr]
238245

246+
def _rebuild_pk(self) -> None:
247+
"""Rebuild the manifest table with the correct PK (asset_id, zone_id, asset_resource)."""
248+
conn = self._conn
249+
assert conn is not None
250+
conn.execute("ALTER TABLE manifest RENAME TO manifest_old")
251+
conn.executescript(_FRESH_SCHEMA)
252+
# Copy data, keeping only one row per (asset_id, zone_id, asset_resource)
253+
cols = _ALL_COLUMNS
254+
conn.execute(
255+
f"INSERT OR IGNORE INTO manifest ({cols}) "
256+
f"SELECT {cols} FROM manifest_old"
257+
)
258+
conn.execute("DROP TABLE manifest_old")
259+
conn.commit()
260+
239261
def close(self) -> None:
240262
"""Close the manifest DB, committing any pending writes."""
241263
if self._conn:
@@ -272,12 +294,12 @@ def __enter__(self) -> "ManifestDB":
272294
def __exit__(self, *_: object) -> None:
273295
self.close()
274296

275-
def lookup(self, asset_id: str, zone_id: str, local_path: str) -> ManifestRow | None:
297+
def lookup(self, asset_id: str, zone_id: str, asset_resource: str) -> ManifestRow | None:
276298
"""Look up a manifest entry by identity."""
277299
row = self._db.execute(
278300
f"SELECT {_ALL_COLUMNS} FROM manifest "
279-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
280-
(asset_id, zone_id, local_path),
301+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
302+
(asset_id, zone_id, asset_resource),
281303
).fetchone()
282304
if row is None:
283305
return None
@@ -300,6 +322,7 @@ def upsert(
300322
zone_id: str,
301323
local_path: str,
302324
version_size: int,
325+
asset_resource: str = "resOriginal",
303326
version_checksum: str | None = None,
304327
change_tag: str | None = None,
305328
item_type: str | None = None,
@@ -333,7 +356,7 @@ def upsert(
333356
"""Insert or update a manifest entry. Auto-flushes every 500 writes."""
334357
now = datetime.now(tz=timezone.utc).isoformat()
335358
params = (
336-
asset_id, zone_id, local_path, version_size, version_checksum,
359+
asset_id, zone_id, asset_resource, local_path, version_size, version_checksum,
337360
change_tag, now, now, item_type, filename,
338361
asset_date, added_date, is_favorite, is_hidden, is_deleted,
339362
original_width, original_height, duration, orientation,
@@ -402,21 +425,21 @@ def upsert(
402425
logger.warning("Manifest write failed for %s: %s", local_path, e)
403426
return
404427

405-
def update_path(self, asset_id: str, zone_id: str, old_path: str, new_path: str) -> None:
428+
def update_path(self, asset_id: str, zone_id: str, asset_resource: str, new_path: str) -> None:
406429
"""Update local_path for an existing manifest entry."""
407430
try:
408431
self._db.execute(
409432
"UPDATE manifest SET local_path = ?, last_updated_at = ? "
410-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
433+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
411434
(new_path, datetime.now(tz=timezone.utc).isoformat(),
412-
asset_id, zone_id, old_path),
435+
asset_id, zone_id, asset_resource),
413436
)
414437
self._dirty = True
415438
self._pending_count += 1
416439
except sqlite3.Error as e:
417440
logger.warning(
418441
"Manifest path update failed for %s -> %s: %s",
419-
old_path, new_path, e,
442+
asset_resource, new_path, e,
420443
)
421444

422445
def update_file_mtime(
@@ -456,11 +479,11 @@ def count_by_path(self, local_path: str) -> int:
456479
).fetchone()
457480
return row[0] if row else 0
458481

459-
def remove(self, asset_id: str, zone_id: str, local_path: str) -> None:
482+
def remove(self, asset_id: str, zone_id: str, asset_resource: str) -> None:
460483
"""Remove a manifest entry."""
461484
self._db.execute(
462-
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
463-
(asset_id, zone_id, local_path),
485+
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
486+
(asset_id, zone_id, asset_resource),
464487
)
465488
self._dirty = True
466489

src/icloudpd/metadata_writer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,7 @@ def extract_metadata_update(
210210
gps_latitude = xmp_metadata.GPSLatitude if xmp_metadata.GPSLatitude is not None else None
211211
gps_longitude = xmp_metadata.GPSLongitude if xmp_metadata.GPSLongitude is not None else None
212212
gps_altitude = xmp_metadata.GPSAltitude if xmp_metadata.GPSAltitude is not None else None
213-
# horzAcc is not in XMPMetadata — extract from raw locationEnc
214-
gps_h_accuracy = _extract_h_accuracy(asset_record)
213+
gps_h_accuracy = xmp_metadata.GPSHPositioningError
215214
# Extract created_date for datetime/dates categories
216215
created_date_val = None
217216
if xmp_metadata.CreateDate:

src/pyicloud_ipd/version_size.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,22 @@ def __str__(self) -> str:
2222

2323

2424
VersionSize = AssetVersionSize | LivePhotoVersionSize
25+
26+
27+
# Maps VersionSize enums to Apple's iCloud resource field prefixes.
28+
# These prefixes are stable identifiers from the CPLMaster record.
29+
_VERSION_TO_RESOURCE: dict[VersionSize, str] = {
30+
AssetVersionSize.ORIGINAL: "resOriginal",
31+
AssetVersionSize.ALTERNATIVE: "resOriginalAlt",
32+
AssetVersionSize.ADJUSTED: "resJPEGFull",
33+
AssetVersionSize.MEDIUM: "resJPEGMed",
34+
AssetVersionSize.THUMB: "resJPEGThumb",
35+
LivePhotoVersionSize.ORIGINAL: "resOriginalVidCompl",
36+
LivePhotoVersionSize.MEDIUM: "resVidMed",
37+
LivePhotoVersionSize.THUMB: "resVidSmall",
38+
}
39+
40+
41+
def version_to_resource(version_size: VersionSize) -> str:
42+
"""Map a VersionSize enum to its Apple resource field prefix."""
43+
return _VERSION_TO_RESOURCE[version_size]

0 commit comments

Comments
 (0)