Skip to content

Commit c29c707

Browse files
teh-hippoCopilot
andcommitted
feat: change manifest PK from local_path to asset_resource
Replace PK (asset_id, zone_id, local_path) with (asset_id, zone_id, asset_resource) where asset_resource is the Apple iCloud resource field prefix (e.g. resOriginal, resOriginalVidCompl). This makes local_path an updateable column, so config changes like toggling --keep-unicode-in-filenames update the existing manifest row instead of creating a duplicate. - Add asset_resource column (schema v3) - Add version_to_resource() mapping in version_size.py - Update all manifest.lookup/upsert/remove calls in base.py - Migration: v0/v1/v2 DBs get asset_resource via ALTER + table rebuild - 7 new unit tests for asset_resource PK behaviour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent dac97f8 commit c29c707

File tree

4 files changed

+229
-46
lines changed

4 files changed

+229
-46
lines changed

src/icloudpd/base.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
size_to_suffix,
9393
store_password_in_keyring,
9494
)
95-
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize
95+
from pyicloud_ipd.version_size import AssetVersionSize, LivePhotoVersionSize, version_to_resource
9696

9797
freeze_support() # fmt: skip # fixing tqdm on macos
9898

@@ -128,6 +128,7 @@ def _check_collision(
128128
directory: str,
129129
dir_cache: "DirCache",
130130
logger: Logger,
131+
asset_resource: str = "resOriginal",
131132
) -> tuple[str, str, bool, str | None]:
132133
"""Check if another asset owns this path and self-heal if needed.
133134
@@ -140,7 +141,7 @@ def _check_collision(
140141
dedup_suffix = f"_{suffix}"
141142
new_download_path = add_suffix_to_filename(dedup_suffix, download_path)
142143
new_rel_path = os.path.relpath(new_download_path, directory)
143-
manifest.update_path(photo_id, manifest.zone_id, rel_path, new_rel_path)
144+
manifest.update_path(photo_id, manifest.zone_id, asset_resource, new_rel_path)
144145
file_exists = dir_cache.isfile(new_download_path)
145146
logger.debug(
146147
"Collision on %s (owned by %s), deduping %s to %s",
@@ -912,16 +913,18 @@ def download_builder(
912913
# Compute relative path for manifest — use the actual file path on disk
913914
actual_path = original_download_path if original_download_path and file_exists else download_path
914915
rel_path = os.path.relpath(actual_path, directory) if manifest else ""
916+
asset_res = version_to_resource(download_size)
915917

916918
dedup_download = False
917919
if file_exists:
918920
if manifest is not None:
919921
# Identity-based matching: check manifest for this asset
920-
manifest_row = manifest.lookup(photo.id, manifest.zone_id, rel_path)
922+
manifest_row = manifest.lookup(photo.id, manifest.zone_id, asset_res)
921923
if manifest_row is not None:
922924
# Check if this asset actually owns the file
923925
rel_path, download_path, file_exists, dedup_suffix = _check_collision(
924926
manifest, photo.id, rel_path, download_path, directory, dir_cache, logger,
927+
asset_resource=asset_res,
925928
)
926929
dedup_download = dedup_suffix is not None and not file_exists
927930
if dedup_suffix is None and manifest_row.version_size != version.size:
@@ -951,6 +954,7 @@ def download_builder(
951954
zone_id=manifest.zone_id,
952955
local_path=rel_path,
953956
version_size=version.size,
957+
asset_resource=asset_res,
954958
**meta,
955959
)
956960
else:
@@ -971,6 +975,7 @@ def download_builder(
971975
zone_id=manifest.zone_id,
972976
local_path=rel_path,
973977
version_size=version.size,
978+
asset_resource=asset_res,
974979
**meta,
975980
)
976981
logger.debug(
@@ -987,6 +992,7 @@ def download_builder(
987992
zone_id=manifest.zone_id,
988993
local_path=rel_path,
989994
version_size=version.size,
995+
asset_resource=asset_res,
990996
**meta,
991997
)
992998
logger.debug(
@@ -1063,6 +1069,7 @@ def download_builder(
10631069
zone_id=manifest.zone_id,
10641070
local_path=rel_path,
10651071
version_size=version.size,
1072+
asset_resource=asset_res,
10661073
**meta,
10671074
)
10681075
logger.info("Downloaded %s", truncated_path)
@@ -1078,6 +1085,7 @@ def download_builder(
10781085
# Also download the live photo if present
10791086
if not skip_live_photos:
10801087
lp_size = live_photo_size
1088+
lp_res = version_to_resource(lp_size)
10811089
photo_versions_with_policy = photo.versions_with_raw_policy(raw_policy)
10821090
if lp_size in photo_versions_with_policy:
10831091
version = photo_versions_with_policy[lp_size]
@@ -1108,7 +1116,7 @@ def download_builder(
11081116
if only_print_filenames:
11091117
if lp_file_exists and manifest is not None:
11101118
# Check if version changed — need to print the filename for re-download
1111-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1119+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11121120
if lp_manifest_row is not None and lp_manifest_row.version_size != version.size:
11131121
lp_file_exists = False
11141122
if not lp_file_exists:
@@ -1127,11 +1135,12 @@ def download_builder(
11271135
else:
11281136
if lp_file_exists:
11291137
if manifest is not None:
1130-
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_rel_path)
1138+
lp_manifest_row = manifest.lookup(photo.id, manifest.zone_id, lp_res)
11311139
if lp_manifest_row is not None:
11321140
# Check if this asset owns the LP file
11331141
lp_rel_path, lp_download_path, lp_file_exists, lp_dedup = _check_collision(
11341142
manifest, photo.id, lp_rel_path, lp_download_path, directory, dir_cache, logger,
1143+
asset_resource=lp_res,
11351144
)
11361145
if lp_dedup is not None:
11371146
# Collision handled — propagate suffix
@@ -1153,6 +1162,7 @@ def download_builder(
11531162
zone_id=manifest.zone_id,
11541163
local_path=lp_rel_path,
11551164
version_size=version.size,
1165+
asset_resource=lp_res,
11561166
**meta,
11571167
)
11581168
else:
@@ -1171,6 +1181,7 @@ def download_builder(
11711181
zone_id=manifest.zone_id,
11721182
local_path=lp_rel_path,
11731183
version_size=version.size,
1184+
asset_resource=lp_res,
11741185
**meta,
11751186
)
11761187
logger.debug(
@@ -1185,6 +1196,7 @@ def download_builder(
11851196
zone_id=manifest.zone_id,
11861197
local_path=lp_rel_path,
11871198
version_size=version.size,
1199+
asset_resource=lp_res,
11881200
**meta,
11891201
)
11901202
elif file_match_policy == FileMatchPolicy.NAME_SIZE_DEDUP_WITH_SUFFIX:
@@ -1223,6 +1235,7 @@ def download_builder(
12231235
zone_id=manifest.zone_id,
12241236
local_path=lp_rel_path,
12251237
version_size=version.size,
1238+
asset_resource=lp_res,
12261239
**meta,
12271240
)
12281241
logger.info("Downloaded %s", truncated_path)

src/icloudpd/manifest.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919

2020
logger = logging.getLogger(__name__)
2121

22-
SCHEMA_VERSION = 2
22+
SCHEMA_VERSION = 3
2323

2424
_FRESH_SCHEMA = """\
2525
CREATE TABLE IF NOT EXISTS manifest (
2626
asset_id TEXT NOT NULL,
2727
zone_id TEXT NOT NULL DEFAULT '',
28+
asset_resource TEXT NOT NULL DEFAULT 'resOriginal',
2829
local_path TEXT NOT NULL,
2930
version_size INTEGER NOT NULL,
3031
version_checksum TEXT,
@@ -58,7 +59,7 @@
5859
burst_id TEXT,
5960
original_orientation INTEGER,
6061
raw_fields TEXT,
61-
PRIMARY KEY (asset_id, zone_id, local_path)
62+
PRIMARY KEY (asset_id, zone_id, asset_resource)
6263
);
6364
CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path);
6465
"""
@@ -76,6 +77,7 @@
7677
(2, "ALTER TABLE manifest ADD COLUMN burst_id TEXT"),
7778
(2, "ALTER TABLE manifest ADD COLUMN original_orientation INTEGER"),
7879
(2, "ALTER TABLE manifest ADD COLUMN raw_fields TEXT"),
80+
(3, "ALTER TABLE manifest ADD COLUMN asset_resource TEXT NOT NULL DEFAULT 'resOriginal'"),
7981
]
8082

8183

@@ -85,6 +87,7 @@ class ManifestRow:
8587

8688
asset_id: str
8789
zone_id: str
90+
asset_resource: str
8891
local_path: str
8992
version_size: int
9093
version_checksum: str | None
@@ -121,7 +124,7 @@ class ManifestRow:
121124

122125

123126
_ALL_COLUMNS = (
124-
"asset_id, zone_id, local_path, version_size, version_checksum, "
127+
"asset_id, zone_id, asset_resource, local_path, version_size, version_checksum, "
125128
"change_tag, downloaded_at, last_updated_at, item_type, filename, "
126129
"asset_date, added_date, is_favorite, is_hidden, is_deleted, "
127130
"original_width, original_height, duration, orientation, "
@@ -211,12 +214,14 @@ def _migrate_from_v0(self) -> None:
211214
("burst_id", "TEXT"),
212215
("original_orientation", "INTEGER"),
213216
("raw_fields", "TEXT"),
217+
("asset_resource", "TEXT NOT NULL DEFAULT 'resOriginal'"),
214218
]
215219
for col_name, col_def in new_columns:
216220
if col_name not in existing:
217221
self._conn.execute(f"ALTER TABLE manifest ADD COLUMN {col_name} {col_def}") # type: ignore[union-attr]
218222
logger.info("Migrated manifest DB from v0 to v%d (%d columns added)",
219223
SCHEMA_VERSION, sum(1 for c, _ in new_columns if c not in existing))
224+
self._rebuild_pk()
220225
self._conn.execute( # type: ignore[union-attr]
221226
"CREATE INDEX IF NOT EXISTS idx_manifest_path ON manifest(local_path)"
222227
)
@@ -225,10 +230,30 @@ def _run_migrations(self, from_version: int) -> None:
225230
"""Run incremental migrations from from_version to SCHEMA_VERSION."""
226231
for version, sql in _MIGRATIONS:
227232
if version > from_version:
228-
self._conn.execute(sql) # type: ignore[union-attr]
233+
try:
234+
self._conn.execute(sql) # type: ignore[union-attr]
235+
except sqlite3.OperationalError:
236+
pass # column already exists
237+
if from_version < 3:
238+
self._rebuild_pk()
229239
self._conn.execute(f"PRAGMA user_version={SCHEMA_VERSION}") # type: ignore[union-attr]
230240
self._conn.commit() # type: ignore[union-attr]
231241

242+
def _rebuild_pk(self) -> None:
243+
"""Rebuild the manifest table with the correct PK (asset_id, zone_id, asset_resource)."""
244+
conn = self._conn
245+
assert conn is not None
246+
conn.execute("ALTER TABLE manifest RENAME TO manifest_old")
247+
conn.executescript(_FRESH_SCHEMA)
248+
# Copy data, keeping only one row per (asset_id, zone_id, asset_resource)
249+
cols = _ALL_COLUMNS
250+
conn.execute(
251+
f"INSERT OR IGNORE INTO manifest ({cols}) "
252+
f"SELECT {cols} FROM manifest_old"
253+
)
254+
conn.execute("DROP TABLE manifest_old")
255+
conn.commit()
256+
232257
def close(self) -> None:
233258
"""Close the manifest DB, committing any pending writes."""
234259
if self._conn:
@@ -265,12 +290,12 @@ def __enter__(self) -> "ManifestDB":
265290
def __exit__(self, *_: object) -> None:
266291
self.close()
267292

268-
def lookup(self, asset_id: str, zone_id: str, local_path: str) -> ManifestRow | None:
293+
def lookup(self, asset_id: str, zone_id: str, asset_resource: str) -> ManifestRow | None:
269294
"""Look up a manifest entry by identity."""
270295
row = self._db.execute(
271296
f"SELECT {_ALL_COLUMNS} FROM manifest "
272-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
273-
(asset_id, zone_id, local_path),
297+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
298+
(asset_id, zone_id, asset_resource),
274299
).fetchone()
275300
if row is None:
276301
return None
@@ -293,6 +318,7 @@ def upsert(
293318
zone_id: str,
294319
local_path: str,
295320
version_size: int,
321+
asset_resource: str = "resOriginal",
296322
version_checksum: str | None = None,
297323
change_tag: str | None = None,
298324
item_type: str | None = None,
@@ -326,7 +352,7 @@ def upsert(
326352
"""Insert or update a manifest entry. Auto-flushes every 500 writes."""
327353
now = datetime.now(tz=timezone.utc).isoformat()
328354
params = (
329-
asset_id, zone_id, local_path, version_size, version_checksum,
355+
asset_id, zone_id, asset_resource, local_path, version_size, version_checksum,
330356
change_tag, now, now, item_type, filename,
331357
asset_date, added_date, is_favorite, is_hidden, is_deleted,
332358
original_width, original_height, duration, orientation,
@@ -336,8 +362,9 @@ def upsert(
336362
)
337363
sql = (
338364
f"INSERT INTO manifest ({_ALL_COLUMNS}) "
339-
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
340-
"ON CONFLICT(asset_id, zone_id, local_path) DO UPDATE SET "
365+
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
366+
"ON CONFLICT(asset_id, zone_id, asset_resource) DO UPDATE SET "
367+
"local_path=excluded.local_path, "
341368
"version_size=excluded.version_size, "
342369
"version_checksum=excluded.version_checksum, "
343370
"change_tag=excluded.change_tag, "
@@ -392,14 +419,14 @@ def upsert(
392419
logger.warning("Manifest write failed for %s: %s", local_path, e)
393420
return
394421

395-
def update_path(self, asset_id: str, zone_id: str, old_path: str, new_path: str) -> None:
422+
def update_path(self, asset_id: str, zone_id: str, asset_resource: str, new_path: str) -> None:
396423
"""Update local_path for an existing manifest entry."""
397424
try:
398425
self._db.execute(
399426
"UPDATE manifest SET local_path = ?, last_updated_at = ? "
400-
"WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
427+
"WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
401428
(new_path, datetime.now(tz=timezone.utc).isoformat(),
402-
asset_id, zone_id, old_path),
429+
asset_id, zone_id, asset_resource),
403430
)
404431
self._dirty = True
405432
self._pending_count += 1
@@ -417,11 +444,11 @@ def count_by_path(self, local_path: str) -> int:
417444
).fetchone()
418445
return row[0] if row else 0
419446

420-
def remove(self, asset_id: str, zone_id: str, local_path: str) -> None:
447+
def remove(self, asset_id: str, zone_id: str, asset_resource: str) -> None:
421448
"""Remove a manifest entry."""
422449
self._db.execute(
423-
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND local_path = ?",
424-
(asset_id, zone_id, local_path),
450+
"DELETE FROM manifest WHERE asset_id = ? AND zone_id = ? AND asset_resource = ?",
451+
(asset_id, zone_id, asset_resource),
425452
)
426453
self._dirty = True
427454

src/pyicloud_ipd/version_size.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,22 @@ def __str__(self) -> str:
2222

2323

2424
VersionSize = AssetVersionSize | LivePhotoVersionSize
25+
26+
27+
# Maps VersionSize enums to Apple's iCloud resource field prefixes.
28+
# These prefixes are stable identifiers from the CPLMaster record.
29+
_VERSION_TO_RESOURCE: dict[VersionSize, str] = {
30+
AssetVersionSize.ORIGINAL: "resOriginal",
31+
AssetVersionSize.ALTERNATIVE: "resOriginalAlt",
32+
AssetVersionSize.ADJUSTED: "resJPEGFull",
33+
AssetVersionSize.MEDIUM: "resJPEGMed",
34+
AssetVersionSize.THUMB: "resJPEGThumb",
35+
LivePhotoVersionSize.ORIGINAL: "resOriginalVidCompl",
36+
LivePhotoVersionSize.MEDIUM: "resVidMed",
37+
LivePhotoVersionSize.THUMB: "resVidSmall",
38+
}
39+
40+
41+
def version_to_resource(version_size: VersionSize) -> str:
42+
"""Map a VersionSize enum to its Apple resource field prefix."""
43+
return _VERSION_TO_RESOURCE[version_size]

0 commit comments

Comments
 (0)