Skip to content

Commit 6125820

Browse files
teh-hippoCopilot
andcommitted
feat: change manifest PK from local_path to asset_resource
Replace PK (asset_id, zone_id, local_path) with (asset_id, zone_id, asset_resource) where asset_resource is the Apple iCloud resource field prefix (e.g. resOriginal, resOriginalVidCompl). This makes local_path an updateable column, so config changes like toggling --keep-unicode-in-filenames update the existing manifest row instead of creating a duplicate. - Add asset_resource column (schema v3) - Add version_to_resource() mapping in version_size.py - Update all manifest.lookup/upsert/remove calls in base.py - Migration: v0/v1/v2 DBs get asset_resource via ALTER + table rebuild - 7 new unit tests for asset_resource PK behaviour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent e740c2b commit 6125820

File tree

6 files changed

+468
-58
lines changed

6 files changed

+468
-58
lines changed

src/icloudpd/base.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
size_to_suffix,
9393
store_password_in_keyring,
9494
)
95-
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize
95+
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize, version_to_resource
9696

9797
freeze_support() # fmt: skip # fixing tqdm on macos
9898

@@ -128,6 +128,7 @@ def _check_collision(
128128
directory: str,
129129
dir_cache: "DirCache",
130130
logger: Logger,
131+
asset_resource: str = "resOriginal",
131132
) -> tuple[str, str, bool, str | None]:
132133
"""Check if another asset owns this path and self-heal if needed.
133134
@@ -140,7 +141,7 @@ def _check_collision(
140141
dedup_suffix = f"_{suffix}"
141142
new_download_path = add_suffix_to_filename(dedup_suffix, download_path)
142143
new_rel_path = os.path.relpath(new_download_path, directory)
143-
manifest.update_path(photo_id, manifest.zone_id, rel_path, new_rel_path)
144+
manifest.update_path(photo_id, manifest.zone_id, asset_resource, new_rel_path)
144145
file_exists = dir_cache.isfile(new_download_path)
145146
logger.debug(
146147
"Collision on %s (owned by %s), deduping %s to %s",
@@ -912,16 +913,18 @@ def download_builder(
912913
# Compute relative path for manifest — use the actual file path on disk
913914
actual_path = original_download_path if original_download_path and file_exists else download_path
914915
rel_path = os.path.relpath(actual_path, directory) if manifest else ""
916+
asset_res = version_to_resource(download_size)
915917

916918
dedup_download = False
917919
if file_exists:
918920
if manifest is not None:
919921
# Identity-based matching: check manifest for this asset
920-
manifest_row = manifest.lookup(photo.id, manifest.zone_id, rel_path)
922+
manifest_row = manifest.lookup(photo.id, manifest.zone_id, asset_res)
921923
if manifest_row is not None:
922924
# Check if this asset actually owns the file
923925
rel_path, download_path, file_exists, dedup_suffix = _check_collision(
924926
manifest, photo.id, rel_path, download_path, directory, dir_cache, logger,
927+
asset_resource=asset_res,
925928
)
926929
dedup_download = dedup_suffix is not None and not file_exists
927930
if dedup_suffix is None and manifest_row.version_size != version.size:
@@ -951,6 +954,7 @@ def download_builder(
951954
zone_id=manifest.zone_id,
952955
local_path=rel_path,
953956
version_size=version.size,
957+
asset_resource=asset_res,
954958
**meta,
955959
)
956960
else:
@@ -971,6 +975,7 @@ def download_builder(
971975
zone_id=manifest.zone_id,
972976
local_path=rel_path,
973977
version_size=version.size,
978+
asset_resource=asset_res,
974979
**meta,
975980
)
976981
logger.debug(
@@ -987,6 +992,7 @@ def download_builder(
987992
zone_id=manifest.zone_id,
988993
local_path=rel_path,
989994
version_size=version.size,
995+
asset_resource=asset_res,
990996
**meta,
991997
)
992998
logger.debug(
@@ -1063,6 +1069,7 @@ def download_builder(
10631069
zone_id=manifest.zone_id,
10641070
local_path=rel_path,
10651071
version_size=version.size,
1072+
asset_resource=asset_res,
10661073
**meta,
10671074
)
10681075
logger.info("Downloaded %s", truncated_path)
@@ -1081,6 +1088,7 @@ def download_builder(
10811088
# Also download the live photo if present
10821089
if not skip_live_photos:
10831090
lp_size = live_photo_size
1091+
lp_res = version_to_resource(lp_size)
10841092
photo_versions_with_policy = photo.versions_with_raw_policy(raw_policy)
10851093
if lp_size in photo_versions_with_policy:
10861094
version = photo_versions_with_policy[lp_size]
@@ -1111,7 +1119,7 @@ def download_builder(
11111119
if only_print_filenames:
11121120
if lp_file_exists and manifest is not None:
11131121
# Check if version changed — need to print the filename for re-download
1114-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1122+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11151123
if lp_manifest_row is not None and lp_manifest_row.version_size != version.size:
11161124
lp_file_exists = False
11171125
if not lp_file_exists:
@@ -1130,11 +1138,12 @@ def download_builder(
11301138
else:
11311139
if lp_file_exists:
11321140
if manifest is not None:
1133-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1141+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11341142
if lp_manifest_row is not None:
11351143
# Check if this asset owns the LP file
11361144
lp_rel_path, lp_download_path, lp_file_exists, lp_dedup = _check_collision(
11371145
manifest, photo.id, lp_rel_path, lp_download_path, directory, dir_cache, logger,
1146+
asset_resource=lp_res,
11381147
)
11391148
if lp_dedup is not None:
11401149
# Collision handled — propagate suffix
@@ -1156,6 +1165,7 @@ def download_builder(
11561165
zone_id=manifest.zone_id,
11571166
local_path=lp_rel_path,
11581167
version_size=version.size,
1168+
asset_resource=lp_res,
11591169
**meta,
11601170
)
11611171
else:
@@ -1174,6 +1184,7 @@ def download_builder(
11741184
zone_id=manifest.zone_id,
11751185
local_path=lp_rel_path,
11761186
version_size=version.size,
1187+
asset_resource=lp_res,
11771188
**meta,
11781189
)
11791190
logger.debug(
@@ -1188,6 +1199,7 @@ def download_builder(
11881199
zone_id=manifest.zone_id,
11891200
local_path=lp_rel_path,
11901201
version_size=version.size,
1202+
asset_resource=lp_res,
11911203
**meta,
11921204
)
11931205
elif file_match_policy == FileMatchPolicy.NAME_SIZE_DEDUP_WITH_SUFFIX:
@@ -1226,6 +1238,7 @@ def download_builder(
12261238
zone_id=manifest.zone_id,
12271239
local_path=lp_rel_path,
12281240
version_size=version.size,
1241+
asset_resource=lp_res,
12291242
**meta,
12301243
)
12311244
logger.info("Downloaded %s", truncated_path)

src/icloudpd/manifest.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
The manifest lives at {download_dir}/.icloudpd.db and travels with the library.
1111
"""
1212

13+
import contextlib
1314
import logging
1415
import os
1516
import sqlite3
@@ -19,12 +20,13 @@
1920

2021
logger = logging.getLogger(__name__)
2122

22-
SCHEMA_VERSION = 2
23+
SCHEMA_VERSION = 3
2324

2425
_FRESH_SCHEMA = """\
2526
CREATE TABLE IF NOT EXISTS manifest (
2627
asset_id TEXT NOT NULL,
2728
zone_id TEXT NOT NULL DEFAULT '',
29+
asset_resource TEXT NOT NULL DEFAULT 'resOriginal',
2830
local_path TEXT NOT NULL,
2931
version_size INTEGER NOT NULL,
3032
version_checksum TEXT,
@@ -58,7 +60,7 @@
5860
burst_id TEXT,
5961
original_orientation INTEGER,
6062
raw_fields TEXT,
61-
PRIMARY KEY (asset_id, zone_id, local_path)
63+
PRIMARY KEY (asset_id, zone_id, asset_resource)
6264
);
6365
CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path);
6466
"""
@@ -76,6 +78,7 @@
7678
(2, "ALTER TABLE manifest ADD COLUMN burst_id TEXT"),
7779
(2, "ALTER TABLE manifest ADD COLUMN original_orientation INTEGER"),
7880
(2, "ALTER TABLE manifest ADD COLUMN raw_fields TEXT"),
81+
(3, "ALTER TABLE manifest ADD COLUMN asset_resource TEXT NOT NULL DEFAULT 'resOriginal'"),
7982
]
8083

8184

@@ -85,6 +88,7 @@ class ManifestRow:
8588

8689
asset_id: str
8790
zone_id: str
91+
asset_resource: str
8892
local_path: str
8993
version_size: int
9094
version_checksum: str | None
@@ -121,7 +125,7 @@ class ManifestRow:
121125

122126

123127
_ALL_COLUMNS = (
124-
"asset_id, zone_id, local_path, version_size, version_checksum, "
128+
"asset_id, zone_id, asset_resource, local_path, version_size, version_checksum, "
125129
"change_tag, downloaded_at, last_updated_at, item_type, filename, "
126130
"asset_date, added_date, is_favorite, is_hidden, is_deleted, "
127131
"original_width, original_height, duration, orientation, "
@@ -211,12 +215,14 @@ def _migrate_from_v0(self) -> None:
211215
("burst_id", "TEXT"),
212216
("original_orientation", "INTEGER"),
213217
("raw_fields", "TEXT"),
218+
("asset_resource", "TEXT NOT NULL DEFAULT 'resOriginal'"),
214219
]
215220
for col_name, col_def in new_columns:
216221
if col_name not in existing:
217222
self._conn.execute(f"ALTER TABLE manifest ADD COLUMN {col_name} {col_def}") # type: ignore[union-attr]
218223
logger.info("Migrated manifest DB from v0 to v%d (%d columns added)",
219224
SCHEMA_VERSION, sum(1 for c, _ in new_columns if c not in existing))
225+
self._rebuild_pk()
220226
self._conn.execute( # type: ignore[union-attr]
221227
"CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path)"
222228
)
@@ -225,10 +231,28 @@ def _run_migrations(self, from_version: int) -> None:
225231
"""Run incremental migrations from from_version to SCHEMA_VERSION."""
226232
for version, sql in _MIGRATIONS:
227233
if version > from_version:
228-
self._conn.execute(sql) # type: ignore[union-attr]
234+
with contextlib.suppress(sqlite3.OperationalError):
235+
self._conn.execute(sql) # type: ignore[union-attr]
236+
if from_version < 3:
237+
self._rebuild_pk()
229238
self._conn.execute(f"PRAGMA user_version={SCHEMA_VERSION}") # type: ignore[union-attr]
230239
self._conn.commit() # type: ignore[union-attr]
231240

241+
def _rebuild_pk(self) -> None:
242+
"""Rebuild the manifest table with the correct PK (asset_id, zone_id, asset_resource)."""
243+
conn = self._conn
244+
assert conn is not None
245+
conn.execute("ALTER TABLE manifest RENAME TO manifest_old")
246+
conn.executescript(_FRESH_SCHEMA)
247+
# Copy data, keeping only one row per (asset_id, zone_id, asset_resource)
248+
cols = _ALL_COLUMNS
249+
conn.execute(
250+
f"INSERT OR IGNORE INTO manifest ({cols}) "
251+
f"SELECT {cols} FROM manifest_old"
252+
)
253+
conn.execute("DROP TABLE manifest_old")
254+
conn.commit()
255+
232256
def close(self) -> None:
233257
"""Close the manifest DB, committing any pending writes."""
234258
if self._conn:
@@ -265,12 +289,12 @@ def __enter__(self) -> "ManifestDB":
265289
def __exit__(self, *_: object) -> None:
266290
self.close()
267291

268-
def lookup(self, asset_id: str, zone_id: str, local_path: str) -> ManifestRow | None:
292+
def lookup(self, asset_id: str, zone_id: str, asset_resource: str) -> ManifestRow | None:
269293
"""Look up a manifest entry by identity."""
270294
row = self._db.execute(
271295
f"SELECT {_ALL_COLUMNS} FROM manifest "
272-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
273-
(asset_id, zone_id, local_path),
296+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
297+
(asset_id, zone_id, asset_resource),
274298
).fetchone()
275299
if row is None:
276300
return None
@@ -293,6 +317,7 @@ def upsert(
293317
zone_id: str,
294318
local_path: str,
295319
version_size: int,
320+
asset_resource: str = "resOriginal",
296321
version_checksum: str | None = None,
297322
change_tag: str | None = None,
298323
item_type: str | None = None,
@@ -326,7 +351,7 @@ def upsert(
326351
"""Insert or update a manifest entry. Auto-flushes every 500 writes."""
327352
now = datetime.now(tz=timezone.utc).isoformat()
328353
params = (
329-
asset_id, zone_id, local_path, version_size, version_checksum,
354+
asset_id, zone_id, asset_resource, local_path, version_size, version_checksum,
330355
change_tag, now, now, item_type, filename,
331356
asset_date, added_date, is_favorite, is_hidden, is_deleted,
332357
original_width, original_height, duration, orientation,
@@ -336,8 +361,9 @@ def upsert(
336361
)
337362
sql = (
338363
f"INSERT INTO manifest ({_ALL_COLUMNS}) "
339-
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
340-
"ON CONFLICT(asset_id, zone_id, local_path) DO UPDATE SET "
364+
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
365+
"ON CONFLICT(asset_id, zone_id, asset_resource) DO UPDATE SET "
366+
"local_path=excluded.local_path, "
341367
"version_size=excluded.version_size, "
342368
"version_checksum=excluded.version_checksum, "
343369
"change_tag=excluded.change_tag, "
@@ -392,21 +418,21 @@ def upsert(
392418
logger.warning("Manifest write failed for %s: %s", local_path, e)
393419
return
394420

395-
def update_path(self, asset_id: str, zone_id: str, old_path: str, new_path: str) -> None:
421+
def update_path(self, asset_id: str, zone_id: str, asset_resource: str, new_path: str) -> None:
396422
"""Update local_path for an existing manifest entry."""
397423
try:
398424
self._db.execute(
399425
"UPDATE manifest SET local_path = ?, last_updated_at = ? "
400-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
426+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
401427
(new_path, datetime.now(tz=timezone.utc).isoformat(),
402-
asset_id, zone_id, old_path),
428+
asset_id, zone_id, asset_resource),
403429
)
404430
self._dirty = True
405431
self._pending_count += 1
406432
except sqlite3.Error as e:
407433
logger.warning(
408434
"Manifest path update failed for %s -> %s: %s",
409-
old_path, new_path, e,
435+
asset_resource, new_path, e,
410436
)
411437

412438
def count_by_path(self, local_path: str) -> int:
@@ -417,11 +443,11 @@ def count_by_path(self, local_path: str) -> int:
417443
).fetchone()
418444
return row[0] if row else 0
419445

420-
def remove(self, asset_id: str, zone_id: str, local_path: str) -> None:
446+
def remove(self, asset_id: str, zone_id: str, asset_resource: str) -> None:
421447
"""Remove a manifest entry."""
422448
self._db.execute(
423-
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
424-
(asset_id, zone_id, local_path),
449+
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
450+
(asset_id, zone_id, asset_resource),
425451
)
426452
self._dirty = True
427453

src/icloudpd/metadata_writer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,7 @@ def extract_metadata_update(
210210
gps_latitude = xmp_metadata.GPSLatitude if xmp_metadata.GPSLatitude is not None else None
211211
gps_longitude = xmp_metadata.GPSLongitude if xmp_metadata.GPSLongitude is not None else None
212212
gps_altitude = xmp_metadata.GPSAltitude if xmp_metadata.GPSAltitude is not None else None
213-
# horzAcc is not in XMPMetadata — extract from raw locationEnc
214-
gps_h_accuracy = _extract_h_accuracy(asset_record)
213+
gps_h_accuracy = xmp_metadata.GPSHPositioningError
215214
# Extract created_date for datetime/dates categories
216215
created_date_val = None
217216
if xmp_metadata.CreateDate:

src/pyicloud_ipd/version_size.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,22 @@ def __str__(self) -> str:
2222

2323

2424
VersionSize = AssetVersionSize | LivePhotoVersionSize
25+
26+
27+
# Maps VersionSize enums to Apple's iCloud resource field prefixes.
28+
# These prefixes are stable identifiers from the CPLMaster record.
29+
_VERSION_TO_RESOURCE: dict[VersionSize, str] = {
30+
AssetVersionSize.ORIGINAL: "resOriginal",
31+
AssetVersionSize.ALTERNATIVE: "resOriginalAlt",
32+
AssetVersionSize.ADJUSTED: "resJPEGFull",
33+
AssetVersionSize.MEDIUM: "resJPEGMed",
34+
AssetVersionSize.THUMB: "resJPEGThumb",
35+
LivePhotoVersionSize.ORIGINAL: "resOriginalVidCompl",
36+
LivePhotoVersionSize.MEDIUM: "resVidMed",
37+
LivePhotoVersionSize.THUMB: "resVidSmall",
38+
}
39+
40+
41+
def version_to_resource(version_size: VersionSize) -> str:
42+
"""Map a VersionSize enum to its Apple resource field prefix."""
43+
return _VERSION_TO_RESOURCE[version_size]

0 commit comments

Comments
 (0)