Skip to content

Commit 212ac1a

Browse files
teh-hippoCopilot
andcommitted
feat: change manifest PK from local_path to asset_resource
Replace PK (asset_id, zone_id, local_path) with (asset_id, zone_id, asset_resource) where asset_resource is the Apple iCloud resource field prefix (e.g. resOriginal, resOriginalVidCompl). This makes local_path an updateable column, so config changes like toggling --keep-unicode-in-filenames update the existing manifest row instead of creating a duplicate. - Add asset_resource column (schema v3) - Add version_to_resource() mapping in version_size.py - Update all manifest.lookup/upsert/remove calls in base.py - Migration: v0/v1/v2 DBs get asset_resource via ALTER + table rebuild - 7 new unit tests for asset_resource PK behaviour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent dac97f8 commit 212ac1a

File tree

4 files changed

+229
-47
lines changed

4 files changed

+229
-47
lines changed

src/icloudpd/base.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
size_to_suffix,
9393
store_password_in_keyring,
9494
)
95-
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize
95+
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize, version_to_resource
9696

9797
freeze_support() # fmt: skip # fixing tqdm on macos
9898

@@ -128,6 +128,7 @@ def _check_collision(
128128
directory: str,
129129
dir_cache: "DirCache",
130130
logger: Logger,
131+
asset_resource: str = "resOriginal",
131132
) -> tuple[str, str, bool, str | None]:
132133
"""Check if another asset owns this path and self-heal if needed.
133134
@@ -140,7 +141,7 @@ def _check_collision(
140141
dedup_suffix = f"_{suffix}"
141142
new_download_path = add_suffix_to_filename(dedup_suffix, download_path)
142143
new_rel_path = os.path.relpath(new_download_path, directory)
143-
manifest.update_path(photo_id, manifest.zone_id, rel_path, new_rel_path)
144+
manifest.update_path(photo_id, manifest.zone_id, asset_resource, new_rel_path)
144145
file_exists = dir_cache.isfile(new_download_path)
145146
logger.debug(
146147
"Collision on %s (owned by %s), deduping %s to %s",
@@ -912,16 +913,18 @@ def download_builder(
912913
# Compute relative path for manifest — use the actual file path on disk
913914
actual_path = original_download_path if original_download_path and file_exists else download_path
914915
rel_path = os.path.relpath(actual_path, directory) if manifest else ""
916+
asset_res = version_to_resource(download_size)
915917

916918
dedup_download = False
917919
if file_exists:
918920
if manifest is not None:
919921
# Identity-based matching: check manifest for this asset
920-
manifest_row = manifest.lookup(photo.id, manifest.zone_id, rel_path)
922+
manifest_row = manifest.lookup(photo.id, manifest.zone_id, asset_res)
921923
if manifest_row is not None:
922924
# Check if this asset actually owns the file
923925
rel_path, download_path, file_exists, dedup_suffix = _check_collision(
924926
manifest, photo.id, rel_path, download_path, directory, dir_cache, logger,
927+
asset_resource=asset_res,
925928
)
926929
dedup_download = dedup_suffix is not None and not file_exists
927930
if dedup_suffix is None and manifest_row.version_size != version.size:
@@ -951,6 +954,7 @@ def download_builder(
951954
zone_id=manifest.zone_id,
952955
local_path=rel_path,
953956
version_size=version.size,
957+
asset_resource=asset_res,
954958
**meta,
955959
)
956960
else:
@@ -971,6 +975,7 @@ def download_builder(
971975
zone_id=manifest.zone_id,
972976
local_path=rel_path,
973977
version_size=version.size,
978+
asset_resource=asset_res,
974979
**meta,
975980
)
976981
logger.debug(
@@ -987,6 +992,7 @@ def download_builder(
987992
zone_id=manifest.zone_id,
988993
local_path=rel_path,
989994
version_size=version.size,
995+
asset_resource=asset_res,
990996
**meta,
991997
)
992998
logger.debug(
@@ -1063,6 +1069,7 @@ def download_builder(
10631069
zone_id=manifest.zone_id,
10641070
local_path=rel_path,
10651071
version_size=version.size,
1072+
asset_resource=asset_res,
10661073
**meta,
10671074
)
10681075
logger.info("Downloaded %s", truncated_path)
@@ -1078,6 +1085,7 @@ def download_builder(
10781085
# Also download the live photo if present
10791086
if not skip_live_photos:
10801087
lp_size = live_photo_size
1088+
lp_res = version_to_resource(lp_size)
10811089
photo_versions_with_policy = photo.versions_with_raw_policy(raw_policy)
10821090
if lp_size in photo_versions_with_policy:
10831091
version = photo_versions_with_policy[lp_size]
@@ -1108,7 +1116,7 @@ def download_builder(
11081116
if only_print_filenames:
11091117
if lp_file_exists and manifest is not None:
11101118
# Check if version changed — need to print the filename for re-download
1111-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1119+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11121120
if lp_manifest_row is not None and lp_manifest_row.version_size != version.size:
11131121
lp_file_exists = False
11141122
if not lp_file_exists:
@@ -1127,11 +1135,12 @@ def download_builder(
11271135
else:
11281136
if lp_file_exists:
11291137
if manifest is not None:
1130-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1138+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11311139
if lp_manifest_row is not None:
11321140
# Check if this asset owns the LP file
11331141
lp_rel_path, lp_download_path, lp_file_exists, lp_dedup = _check_collision(
11341142
manifest, photo.id, lp_rel_path, lp_download_path, directory, dir_cache, logger,
1143+
asset_resource=lp_res,
11351144
)
11361145
if lp_dedup is not None:
11371146
# Collision handled — propagate suffix
@@ -1153,6 +1162,7 @@ def download_builder(
11531162
zone_id=manifest.zone_id,
11541163
local_path=lp_rel_path,
11551164
version_size=version.size,
1165+
asset_resource=lp_res,
11561166
**meta,
11571167
)
11581168
else:
@@ -1171,6 +1181,7 @@ def download_builder(
11711181
zone_id=manifest.zone_id,
11721182
local_path=lp_rel_path,
11731183
version_size=version.size,
1184+
asset_resource=lp_res,
11741185
**meta,
11751186
)
11761187
logger.debug(
@@ -1185,6 +1196,7 @@ def download_builder(
11851196
zone_id=manifest.zone_id,
11861197
local_path=lp_rel_path,
11871198
version_size=version.size,
1199+
asset_resource=lp_res,
11881200
**meta,
11891201
)
11901202
elif file_match_policy == FileMatchPolicy.NAME_SIZE_DEDUP_WITH_SUFFIX:
@@ -1223,6 +1235,7 @@ def download_builder(
12231235
zone_id=manifest.zone_id,
12241236
local_path=lp_rel_path,
12251237
version_size=version.size,
1238+
asset_resource=lp_res,
12261239
**meta,
12271240
)
12281241
logger.info("Downloaded %s", truncated_path)

src/icloudpd/manifest.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
The manifest lives at {download_dir}/.icloudpd.db and travels with the library.
1111
"""
1212

13+
import contextlib
1314
import logging
1415
import os
1516
import sqlite3
@@ -19,12 +20,13 @@
1920

2021
logger = logging.getLogger(__name__)
2122

22-
SCHEMA_VERSION = 2
23+
SCHEMA_VERSION = 3
2324

2425
_FRESH_SCHEMA = """\
2526
CREATE TABLE IF NOT EXISTS manifest (
2627
asset_id TEXT NOT NULL,
2728
zone_id TEXT NOT NULL DEFAULT '',
29+
asset_resource TEXT NOT NULL DEFAULT 'resOriginal',
2830
local_path TEXT NOT NULL,
2931
version_size INTEGER NOT NULL,
3032
version_checksum TEXT,
@@ -58,7 +60,7 @@
5860
burst_id TEXT,
5961
original_orientation INTEGER,
6062
raw_fields TEXT,
61-
PRIMARY KEY (asset_id, zone_id, local_path)
63+
PRIMARY KEY (asset_id, zone_id, asset_resource)
6264
);
6365
CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path);
6466
"""
@@ -76,6 +78,7 @@
7678
(2, "ALTER TABLE manifest ADD COLUMN burst_id TEXT"),
7779
(2, "ALTER TABLE manifest ADD COLUMN original_orientation INTEGER"),
7880
(2, "ALTER TABLE manifest ADD COLUMN raw_fields TEXT"),
81+
(3, "ALTER TABLE manifest ADD COLUMN asset_resource TEXT NOT NULL DEFAULT 'resOriginal'"),
7982
]
8083

8184

@@ -85,6 +88,7 @@ class ManifestRow:
8588

8689
asset_id: str
8790
zone_id: str
91+
asset_resource: str
8892
local_path: str
8993
version_size: int
9094
version_checksum: str | None
@@ -121,7 +125,7 @@ class ManifestRow:
121125

122126

123127
_ALL_COLUMNS = (
124-
"asset_id, zone_id, local_path, version_size, version_checksum, "
128+
"asset_id, zone_id, asset_resource, local_path, version_size, version_checksum, "
125129
"change_tag, downloaded_at, last_updated_at, item_type, filename, "
126130
"asset_date, added_date, is_favorite, is_hidden, is_deleted, "
127131
"original_width, original_height, duration, orientation, "
@@ -211,12 +215,14 @@ def _migrate_from_v0(self) -> None:
211215
("burst_id", "TEXT"),
212216
("original_orientation", "INTEGER"),
213217
("raw_fields", "TEXT"),
218+
("asset_resource", "TEXT NOT NULL DEFAULT 'resOriginal'"),
214219
]
215220
for col_name, col_def in new_columns:
216221
if col_name not in existing:
217222
self._conn.execute(f"ALTER TABLE manifest ADD COLUMN {col_name} {col_def}") # type: ignore[union-attr]
218223
logger.info("Migrated manifest DB from v0 to v%d (%d columns added)",
219224
SCHEMA_VERSION, sum(1 for c, _ in new_columns if c not in existing))
225+
self._rebuild_pk()
220226
self._conn.execute( # type: ignore[union-attr]
221227
"CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path)"
222228
)
@@ -225,10 +231,28 @@ def _run_migrations(self, from_version: int) -> None:
225231
"""Run incremental migrations from from_version to SCHEMA_VERSION."""
226232
for version, sql in _MIGRATIONS:
227233
if version > from_version:
228-
self._conn.execute(sql) # type: ignore[union-attr]
234+
with contextlib.suppress(sqlite3.OperationalError):
235+
self._conn.execute(sql) # type: ignore[union-attr]
236+
if from_version < 3:
237+
self._rebuild_pk()
229238
self._conn.execute(f"PRAGMA user_version={SCHEMA_VERSION}") # type: ignore[union-attr]
230239
self._conn.commit() # type: ignore[union-attr]
231240

241+
def _rebuild_pk(self) -> None:
242+
"""Rebuild the manifest table with the correct PK (asset_id, zone_id, asset_resource)."""
243+
conn = self._conn
244+
assert conn is not None
245+
conn.execute("ALTER TABLE manifest RENAME TO manifest_old")
246+
conn.executescript(_FRESH_SCHEMA)
247+
# Copy data, keeping only one row per (asset_id, zone_id, asset_resource)
248+
cols = _ALL_COLUMNS
249+
conn.execute(
250+
f"INSERT OR IGNORE INTO manifest ({cols}) "
251+
f"SELECT {cols} FROM manifest_old"
252+
)
253+
conn.execute("DROP TABLE manifest_old")
254+
conn.commit()
255+
232256
def close(self) -> None:
233257
"""Close the manifest DB, committing any pending writes."""
234258
if self._conn:
@@ -265,12 +289,12 @@ def __enter__(self) -> "ManifestDB":
265289
def __exit__(self, *_: object) -> None:
266290
self.close()
267291

268-
def lookup(self, asset_id: str, zone_id: str, local_path: str) -> ManifestRow | None:
292+
def lookup(self, asset_id: str, zone_id: str, asset_resource: str) -> ManifestRow | None:
269293
"""Look up a manifest entry by identity."""
270294
row = self._db.execute(
271295
f"SELECT {_ALL_COLUMNS} FROM manifest "
272-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
273-
(asset_id, zone_id, local_path),
296+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
297+
(asset_id, zone_id, asset_resource),
274298
).fetchone()
275299
if row is None:
276300
return None
@@ -293,6 +317,7 @@ def upsert(
293317
zone_id: str,
294318
local_path: str,
295319
version_size: int,
320+
asset_resource: str = "resOriginal",
296321
version_checksum: str | None = None,
297322
change_tag: str | None = None,
298323
item_type: str | None = None,
@@ -326,7 +351,7 @@ def upsert(
326351
"""Insert or update a manifest entry. Auto-flushes every 500 writes."""
327352
now = datetime.now(tz=timezone.utc).isoformat()
328353
params = (
329-
asset_id, zone_id, local_path, version_size, version_checksum,
354+
asset_id, zone_id, asset_resource, local_path, version_size, version_checksum,
330355
change_tag, now, now, item_type, filename,
331356
asset_date, added_date, is_favorite, is_hidden, is_deleted,
332357
original_width, original_height, duration, orientation,
@@ -336,8 +361,9 @@ def upsert(
336361
)
337362
sql = (
338363
f"INSERT INTO manifest ({_ALL_COLUMNS}) "
339-
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
340-
"ON CONFLICT(asset_id, zone_id, local_path) DO UPDATE SET "
364+
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
365+
"ON CONFLICT(asset_id, zone_id, asset_resource) DO UPDATE SET "
366+
"local_path=excluded.local_path, "
341367
"version_size=excluded.version_size, "
342368
"version_checksum=excluded.version_checksum, "
343369
"change_tag=excluded.change_tag, "
@@ -392,21 +418,21 @@ def upsert(
392418
logger.warning("Manifest write failed for %s: %s", local_path, e)
393419
return
394420

395-
def update_path(self, asset_id: str, zone_id: str, old_path: str, new_path: str) -> None:
421+
def update_path(self, asset_id: str, zone_id: str, asset_resource: str, new_path: str) -> None:
396422
"""Update local_path for an existing manifest entry."""
397423
try:
398424
self._db.execute(
399425
"UPDATE manifest SET local_path = ?, last_updated_at = ? "
400-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
426+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
401427
(new_path, datetime.now(tz=timezone.utc).isoformat(),
402-
asset_id, zone_id, old_path),
428+
asset_id, zone_id, asset_resource),
403429
)
404430
self._dirty = True
405431
self._pending_count += 1
406432
except sqlite3.Error as e:
407433
logger.warning(
408434
"Manifest path update failed for %s -> %s: %s",
409-
old_path, new_path, e,
435+
asset_resource, new_path, e,
410436
)
411437

412438
def count_by_path(self, local_path: str) -> int:
@@ -417,11 +443,11 @@ def count_by_path(self, local_path: str) -> int:
417443
).fetchone()
418444
return row[0] if row else 0
419445

420-
def remove(self, asset_id: str, zone_id: str, local_path: str) -> None:
446+
def remove(self, asset_id: str, zone_id: str, asset_resource: str) -> None:
421447
"""Remove a manifest entry."""
422448
self._db.execute(
423-
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
424-
(asset_id, zone_id, local_path),
449+
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
450+
(asset_id, zone_id, asset_resource),
425451
)
426452
self._dirty = True
427453

src/pyicloud_ipd/version_size.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,22 @@ def __str__(self) -> str:
2222

2323

2424
VersionSize = AssetVersionSize | LivePhotoVersionSize
25+
26+
27+
# Maps VersionSize enums to Apple's iCloud resource field prefixes.
28+
# These prefixes are stable identifiers from the CPLMaster record.
29+
_VERSION_TO_RESOURCE: dict[VersionSize, str] = {
30+
AssetVersionSize.ORIGINAL: "resOriginal",
31+
AssetVersionSize.ALTERNATIVE: "resOriginalAlt",
32+
AssetVersionSize.ADJUSTED: "resJPEGFull",
33+
AssetVersionSize.MEDIUM: "resJPEGMed",
34+
AssetVersionSize.THUMB: "resJPEGThumb",
35+
LivePhotoVersionSize.ORIGINAL: "resOriginalVidCompl",
36+
LivePhotoVersionSize.MEDIUM: "resVidMed",
37+
LivePhotoVersionSize.THUMB: "resVidSmall",
38+
}
39+
40+
41+
def version_to_resource(version_size: VersionSize) -> str:
42+
"""Map a VersionSize enum to its Apple resource field prefix."""
43+
return _VERSION_TO_RESOURCE[version_size]

0 commit comments

Comments
 (0)