Skip to content

Commit 4cf2db1

Browse files
teh-hippoCopilot
andcommitted
feat: add --delete-orphaned flag for cleaning up files no longer in iCloud
Detects manifest entries whose asset_id was not returned by the API during a complete successful run. Without the flag, logs a warning with the orphan count. With the flag, deletes orphaned files, XMP sidecars, and manifest entries. Handles: deleted photos, photos moved between libraries (Personal to Shared or vice versa), and any asset the API stops returning. Safety: only runs after complete iteration (interrupted runs skip the check entirely). Respects --dry-run. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent aaa4565 commit 4cf2db1

File tree

6 files changed

+313
-0
lines changed

6 files changed

+313
-0
lines changed

src/icloudpd/base.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,57 @@ def asset_type_skip_message(
14031403
return f"Skipping {filename}, only downloading {photo_video_phrase}. (Item type was: {photo.item_type})"
14041404

14051405

1406+
def delete_orphaned(
1407+
logger: logging.Logger,
1408+
manifest: "ManifestDB",
1409+
directory: str,
1410+
seen_asset_ids: set[str],
1411+
delete: bool,
1412+
dry_run: bool,
1413+
) -> None:
1414+
"""Check for orphaned manifest entries and optionally delete them."""
1415+
orphans = manifest.find_orphaned(seen_asset_ids)
1416+
if not orphans:
1417+
return
1418+
1419+
orphan_count = len(orphans)
1420+
unique_assets = len({o.asset_id for o in orphans})
1421+
1422+
if not delete:
1423+
logger.warning(
1424+
"Found %d orphaned files (%d assets) not in iCloud library. "
1425+
"Use --delete-orphaned to clean up.",
1426+
orphan_count,
1427+
unique_assets,
1428+
)
1429+
return
1430+
1431+
logger.info(
1432+
"Deleting %d orphaned files (%d assets) not in iCloud library...",
1433+
orphan_count,
1434+
unique_assets,
1435+
)
1436+
deleted = 0
1437+
for orphan in orphans:
1438+
file_path = os.path.join(directory, orphan.local_path)
1439+
xmp_path = file_path + ".xmp"
1440+
if dry_run:
1441+
logger.info("[DRY RUN] Would delete orphaned %s", file_path)
1442+
else:
1443+
for path in (file_path, xmp_path):
1444+
if os.path.exists(path):
1445+
try:
1446+
os.remove(path)
1447+
logger.info("Deleted orphaned %s", path)
1448+
except OSError as e:
1449+
logger.warning("Failed to delete %s: %s", path, e)
1450+
manifest.remove(orphan.asset_id, orphan.zone_id, orphan.asset_resource)
1451+
deleted += 1
1452+
if not dry_run:
1453+
manifest.flush()
1454+
logger.info("Orphan cleanup complete: %d entries removed", deleted)
1455+
1456+
14061457
def core_single_run(
14071458
logger: logging.Logger,
14081459
status_exchange: StatusExchange,
@@ -1523,6 +1574,8 @@ def sum_(inp: Iterable[int]) -> int:
15231574
return sum(inp)
15241575

15251576
photos_count: int | None = compose(sum_, album_lengths)(albums)
1577+
seen_asset_ids: set[str] = set()
1578+
iteration_complete = False
15261579
for photo_album in albums:
15271580
photos_enumerator: Iterable[PhotoAsset] = photo_album
15281581

@@ -1607,6 +1660,7 @@ def should_break(counter: Counter) -> bool:
16071660

16081661
for item in photos_bar:
16091662
try:
1663+
seen_asset_ids.add(item.id)
16101664
if should_break(consecutive_files_found):
16111665
logger.info(
16121666
"Found %s consecutive previously downloaded photos. Exiting",
@@ -1712,6 +1766,8 @@ def should_break(counter: Counter) -> bool:
17121766
if manifest is not None:
17131767
manifest.flush()
17141768

1769+
iteration_complete = not status_exchange.get_progress().cancel
1770+
17151771
if user_config.auto_delete:
17161772
autodelete_photos(
17171773
logger,
@@ -1726,6 +1782,16 @@ def should_break(counter: Counter) -> bool:
17261782
)
17271783
else:
17281784
pass
1785+
1786+
if manifest is not None and iteration_complete:
1787+
delete_orphaned(
1788+
logger,
1789+
manifest,
1790+
directory,
1791+
seen_asset_ids,
1792+
delete=user_config.delete_orphaned,
1793+
dry_run=user_config.dry_run,
1794+
)
17291795
except PyiCloudFailedLoginException as error:
17301796
logger.info(error)
17311797
dump_responses(logger.debug, captured_responses)

src/icloudpd/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,12 @@ def add_options_for_user(parser: argparse.ArgumentParser) -> argparse.ArgumentPa
147147
+ "(If you restore the photo in iCloud, it will be downloaded again.)",
148148
action="store_true",
149149
)
150+
cloned.add_argument(
151+
"--delete-orphaned",
152+
help="Delete local files that are no longer in the iCloud library "
153+
+ "(e.g. deleted from iCloud or moved to another library).",
154+
action="store_true",
155+
)
150156
cloned.add_argument(
151157
"--folder-structure",
152158
help="Folder structure. If set to `none`, all photos will be placed into the download directory. Default: %(default)s",
@@ -545,6 +551,7 @@ def map_to_config(user_ns: argparse.Namespace) -> UserConfig:
545551
skip_live_photos=user_ns.skip_live_photos,
546552
force_size=user_ns.force_size,
547553
auto_delete=user_ns.auto_delete,
554+
delete_orphaned=user_ns.delete_orphaned,
548555
folder_structure=user_ns.folder_structure,
549556
set_exif_datetime=user_ns.set_exif_datetime,
550557
write_metadata_xmp=write_metadata_xmp,

src/icloudpd/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class _DefaultConfig:
2929
skip_live_photos: bool
3030
force_size: bool
3131
auto_delete: bool
32+
delete_orphaned: bool
3233
folder_structure: str
3334
set_exif_datetime: bool
3435
write_metadata_xmp: frozenset[str]

src/icloudpd/manifest.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,18 @@ def count(self) -> int:
485485
"""Return the total number of manifest entries."""
486486
row = self._db.execute("SELECT COUNT(*) FROM manifest").fetchone()
487487
return row[0] if row else 0
488+
489+
def find_orphaned(self, seen_asset_ids: set[str]) -> list[ManifestRow]:
490+
"""Return manifest rows whose asset_id is not in the seen set."""
491+
all_rows = self._db.execute(
492+
"SELECT * FROM manifest"
493+
).fetchall()
494+
col_names = [desc[0] for desc in self._db.execute(
495+
"SELECT * FROM manifest LIMIT 0"
496+
).description or []]
497+
orphans: list[ManifestRow] = []
498+
for row in all_rows:
499+
row_dict = dict(zip(col_names, row, strict=False))
500+
if row_dict["asset_id"] not in seen_asset_ids:
501+
orphans.append(ManifestRow(**row_dict))
502+
return orphans

tests/test_cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def test_cli_parser(self) -> None:
213213
skip_live_photos=False,
214214
force_size=False,
215215
auto_delete=False,
216+
delete_orphaned=False,
216217
folder_structure="{:%Y/%m/%d}",
217218
set_exif_datetime=False,
218219
write_metadata_xmp=frozenset(),
@@ -255,6 +256,7 @@ def test_cli_parser(self) -> None:
255256
skip_live_photos=False,
256257
force_size=False,
257258
auto_delete=False,
259+
delete_orphaned=False,
258260
folder_structure="{:%Y/%m/%d}",
259261
set_exif_datetime=False,
260262
write_metadata_xmp=frozenset(),
@@ -333,6 +335,7 @@ def test_cli_parser(self) -> None:
333335
skip_live_photos=False,
334336
force_size=False,
335337
auto_delete=False,
338+
delete_orphaned=False,
336339
folder_structure="{:%Y/%m/%d}",
337340
set_exif_datetime=False,
338341
write_metadata_xmp=frozenset(),

tests/test_delete_orphaned.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""Tests for delete_orphaned functionality."""
2+
3+
import logging
4+
import os
5+
import sqlite3
6+
import tempfile
7+
from unittest.mock import MagicMock
8+
9+
import pytest
10+
11+
from icloudpd.base import delete_orphaned
12+
from icloudpd.manifest import ManifestDB
13+
14+
15+
def _setup_manifest_with_files(
16+
tmpdir: str,
17+
entries: list[dict[str, str]],
18+
) -> ManifestDB:
19+
"""Create a manifest DB with entries and corresponding files on disk."""
20+
manifest = ManifestDB(tmpdir)
21+
manifest.open()
22+
for entry in entries:
23+
asset_id = entry["asset_id"]
24+
zone_id = entry.get("zone_id", "PrimarySync")
25+
local_path = entry["local_path"]
26+
asset_resource = entry.get("asset_resource", "resOriginal")
27+
# Create the file on disk
28+
full_path = os.path.join(tmpdir, local_path)
29+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
30+
with open(full_path, "w") as f:
31+
f.write("test content")
32+
# Create XMP sidecar
33+
with open(full_path + ".xmp", "w") as f:
34+
f.write("<xmp>test</xmp>")
35+
# Add to manifest
36+
manifest.upsert(
37+
asset_id=asset_id,
38+
zone_id=zone_id,
39+
local_path=local_path,
40+
version_size=1,
41+
asset_resource=asset_resource,
42+
)
43+
manifest.flush()
44+
return manifest
45+
46+
47+
class TestFindOrphaned:
48+
"""Tests for ManifestDB.find_orphaned()."""
49+
50+
def test_no_orphans(self, tmp_path: object) -> None:
51+
tmpdir = str(tmp_path)
52+
manifest = _setup_manifest_with_files(tmpdir, [
53+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
54+
{"asset_id": "A2", "local_path": "2024-01/photo2.HEIC"},
55+
])
56+
seen = {"A1", "A2"}
57+
orphans = manifest.find_orphaned(seen)
58+
assert len(orphans) == 0
59+
manifest.close()
60+
61+
def test_all_orphaned(self, tmp_path: object) -> None:
62+
tmpdir = str(tmp_path)
63+
manifest = _setup_manifest_with_files(tmpdir, [
64+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
65+
{"asset_id": "A2", "local_path": "2024-01/photo2.HEIC"},
66+
])
67+
seen: set[str] = set()
68+
orphans = manifest.find_orphaned(seen)
69+
assert len(orphans) == 2
70+
assert {o.asset_id for o in orphans} == {"A1", "A2"}
71+
manifest.close()
72+
73+
def test_partial_orphan(self, tmp_path: object) -> None:
74+
tmpdir = str(tmp_path)
75+
manifest = _setup_manifest_with_files(tmpdir, [
76+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
77+
{"asset_id": "A2", "local_path": "2024-01/photo2.HEIC"},
78+
{"asset_id": "A3", "local_path": "2024-01/photo3.HEIC"},
79+
])
80+
seen = {"A1", "A3"}
81+
orphans = manifest.find_orphaned(seen)
82+
assert len(orphans) == 1
83+
assert orphans[0].asset_id == "A2"
84+
manifest.close()
85+
86+
def test_multi_resource_orphan(self, tmp_path: object) -> None:
87+
"""An asset with multiple resources (HEIC + MOV) should return all rows."""
88+
tmpdir = str(tmp_path)
89+
manifest = _setup_manifest_with_files(tmpdir, [
90+
{"asset_id": "A1", "local_path": "2024-01/IMG_001.HEIC", "asset_resource": "resOriginal"},
91+
{"asset_id": "A1", "local_path": "2024-01/IMG_001_HEVC.MOV", "asset_resource": "resOriginalVidCompl"},
92+
{"asset_id": "A2", "local_path": "2024-01/IMG_002.HEIC", "asset_resource": "resOriginal"},
93+
])
94+
seen = {"A2"}
95+
orphans = manifest.find_orphaned(seen)
96+
assert len(orphans) == 2
97+
assert all(o.asset_id == "A1" for o in orphans)
98+
manifest.close()
99+
100+
def test_empty_manifest(self, tmp_path: object) -> None:
101+
tmpdir = str(tmp_path)
102+
manifest = ManifestDB(tmpdir)
103+
manifest.open()
104+
orphans = manifest.find_orphaned({"A1", "A2"})
105+
assert len(orphans) == 0
106+
manifest.close()
107+
108+
109+
class TestDeleteOrphaned:
110+
"""Tests for the delete_orphaned() function."""
111+
112+
def test_warning_mode_no_flag(self, tmp_path: object) -> None:
113+
"""Without --delete-orphaned, logs warning but doesn't delete."""
114+
tmpdir = str(tmp_path)
115+
manifest = _setup_manifest_with_files(tmpdir, [
116+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
117+
])
118+
logger = logging.getLogger("test")
119+
with pytest.raises(Exception) if False else nullcontext():
120+
pass
121+
# Call without delete flag
122+
delete_orphaned(logger, manifest, tmpdir, set(), delete=False, dry_run=False)
123+
# File should still exist
124+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC"))
125+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC.xmp"))
126+
# Manifest should still have the entry
127+
assert manifest.count() == 1
128+
manifest.close()
129+
130+
def test_delete_mode(self, tmp_path: object) -> None:
131+
"""With --delete-orphaned, files and manifest entries are removed."""
132+
tmpdir = str(tmp_path)
133+
manifest = _setup_manifest_with_files(tmpdir, [
134+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
135+
{"asset_id": "A2", "local_path": "2024-01/photo2.HEIC"},
136+
])
137+
logger = logging.getLogger("test")
138+
delete_orphaned(logger, manifest, tmpdir, {"A2"}, delete=True, dry_run=False)
139+
# A1 should be deleted
140+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC"))
141+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC.xmp"))
142+
# A2 should remain
143+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo2.HEIC"))
144+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo2.HEIC.xmp"))
145+
# Manifest should only have A2
146+
assert manifest.count() == 1
147+
manifest.close()
148+
149+
def test_dry_run_skips_deletion(self, tmp_path: object) -> None:
150+
"""Dry run logs but doesn't delete files or manifest entries."""
151+
tmpdir = str(tmp_path)
152+
manifest = _setup_manifest_with_files(tmpdir, [
153+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
154+
])
155+
logger = logging.getLogger("test")
156+
delete_orphaned(logger, manifest, tmpdir, set(), delete=True, dry_run=True)
157+
# File should still exist
158+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC"))
159+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC.xmp"))
160+
# Manifest should still have the entry
161+
assert manifest.count() == 1
162+
manifest.close()
163+
164+
def test_multi_resource_cleanup(self, tmp_path: object) -> None:
165+
"""Deleting orphaned asset removes all resources (HEIC + MOV + XMPs)."""
166+
tmpdir = str(tmp_path)
167+
manifest = _setup_manifest_with_files(tmpdir, [
168+
{"asset_id": "A1", "local_path": "2024-01/IMG.HEIC", "asset_resource": "resOriginal"},
169+
{"asset_id": "A1", "local_path": "2024-01/IMG_HEVC.MOV", "asset_resource": "resOriginalVidCompl"},
170+
])
171+
logger = logging.getLogger("test")
172+
delete_orphaned(logger, manifest, tmpdir, set(), delete=True, dry_run=False)
173+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/IMG.HEIC"))
174+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/IMG.HEIC.xmp"))
175+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/IMG_HEVC.MOV"))
176+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/IMG_HEVC.MOV.xmp"))
177+
assert manifest.count() == 0
178+
manifest.close()
179+
180+
def test_no_orphans_no_action(self, tmp_path: object) -> None:
181+
"""When all assets are seen, no warning or deletion."""
182+
tmpdir = str(tmp_path)
183+
manifest = _setup_manifest_with_files(tmpdir, [
184+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
185+
])
186+
logger = logging.getLogger("test")
187+
delete_orphaned(logger, manifest, tmpdir, {"A1"}, delete=True, dry_run=False)
188+
assert os.path.exists(os.path.join(tmpdir, "2024-01/photo1.HEIC"))
189+
assert manifest.count() == 1
190+
manifest.close()
191+
192+
def test_file_already_missing(self, tmp_path: object) -> None:
193+
"""Orphan in manifest but file already deleted from disk."""
194+
tmpdir = str(tmp_path)
195+
manifest = _setup_manifest_with_files(tmpdir, [
196+
{"asset_id": "A1", "local_path": "2024-01/photo1.HEIC"},
197+
])
198+
# Remove the file manually
199+
os.remove(os.path.join(tmpdir, "2024-01/photo1.HEIC"))
200+
os.remove(os.path.join(tmpdir, "2024-01/photo1.HEIC.xmp"))
201+
logger = logging.getLogger("test")
202+
# Should not crash, just remove manifest entry
203+
delete_orphaned(logger, manifest, tmpdir, set(), delete=True, dry_run=False)
204+
assert manifest.count() == 0
205+
manifest.close()
206+
207+
def test_dedup_suffix_files(self, tmp_path: object) -> None:
208+
"""Orphan with dedup suffix in path is handled correctly."""
209+
tmpdir = str(tmp_path)
210+
manifest = _setup_manifest_with_files(tmpdir, [
211+
{"asset_id": "A1", "local_path": "2024-01/IMG_001_aB3x.HEIC"},
212+
])
213+
logger = logging.getLogger("test")
214+
delete_orphaned(logger, manifest, tmpdir, set(), delete=True, dry_run=False)
215+
assert not os.path.exists(os.path.join(tmpdir, "2024-01/IMG_001_aB3x.HEIC"))
216+
assert manifest.count() == 0
217+
manifest.close()
218+
219+
220+
# Python 3.10 doesn't have contextlib.nullcontext with args
221+
from contextlib import nullcontext

0 commit comments

Comments
 (0)