Skip to content

Commit 481fdde

Browse files
authored
chore: tighten duplicate argument validation in create() and vacuum() (delta-io#4299)
# Description Tighten the Python compatibility handling in `DeltaTable.create()` and `DeltaTable.vacuum()` ensuring duplicate values are rejected when legacy positional arguments are mixed with keywords. # Related Issue(s) <!--- For example: - closes #106 ---> # Documentation <!--- Share links to useful documentation ---> Signed-off-by: Ethan Urbanski <ethan@urbanskitech.com>
1 parent c85075e commit 481fdde

3 files changed

Lines changed: 100 additions & 3 deletions

File tree

python/deltalake/table.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
Literal,
1414
NamedTuple,
1515
Union,
16+
cast,
1617
)
1718

1819
from arro3.core import RecordBatchReader, Table
@@ -71,6 +72,21 @@
7172
PartitionFilterType = list[tuple[str, str, Union[str, list[str]]]]
7273

7374

75+
class _KeywordArgDefault:
76+
"""Sentinel that preserves the rendered default while tracking omission."""
77+
78+
def __init__(self, value: Any) -> None:
79+
self.value = value
80+
81+
def __repr__(self) -> str:
82+
return repr(self.value)
83+
84+
85+
_DEFAULT_TRUE = _KeywordArgDefault(True)
86+
_DEFAULT_FALSE = _KeywordArgDefault(False)
87+
_DEFAULT_NONE = _KeywordArgDefault(None)
88+
89+
7490
@dataclass(init=False)
7591
class Metadata:
7692
"""Create a Metadata instance."""
@@ -203,7 +219,7 @@ def create(
203219
*args: Any,
204220
commit_properties: CommitProperties | None = None,
205221
post_commithook_properties: PostCommitHookProperties | None = None,
206-
raise_if_key_not_exists: bool = True,
222+
raise_if_key_not_exists: bool = cast(bool, _DEFAULT_TRUE),
207223
) -> DeltaTable:
208224
"""`CREATE` or `CREATE_OR_REPLACE` a delta table given a table_uri.
209225
@@ -242,6 +258,9 @@ def create(
242258
)
243259
```
244260
"""
261+
raise_if_key_not_exists_is_default = (
262+
cast(Any, raise_if_key_not_exists) is _DEFAULT_TRUE
263+
)
245264
if args:
246265
warnings.warn(
247266
"Passing commit arguments positionally to create() is deprecated "
@@ -264,7 +283,14 @@ def create(
264283
)
265284
post_commithook_properties = args[1]
266285
if len(args) == 3:
286+
if not raise_if_key_not_exists_is_default:
287+
raise TypeError(
288+
"create() got multiple values for 'raise_if_key_not_exists'"
289+
)
267290
raise_if_key_not_exists = args[2]
291+
raise_if_key_not_exists_is_default = False
292+
if raise_if_key_not_exists_is_default:
293+
raise_if_key_not_exists = True
268294
if isinstance(partition_by, str):
269295
partition_by = [partition_by]
270296

@@ -576,8 +602,8 @@ def vacuum(
576602
*args: Any,
577603
commit_properties: CommitProperties | None = None,
578604
post_commithook_properties: PostCommitHookProperties | None = None,
579-
full: bool = False,
580-
keep_versions: list[int] | None = None,
605+
full: bool = cast(bool, _DEFAULT_FALSE),
606+
keep_versions: list[int] | None = cast(list[int] | None, _DEFAULT_NONE),
581607
) -> list[str]:
582608
"""
583609
Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table.
@@ -596,6 +622,8 @@ def vacuum(
596622
Returns:
597623
the list of files no longer referenced by the Delta Table and are older than the retention threshold.
598624
"""
625+
full_is_default = cast(Any, full) is _DEFAULT_FALSE
626+
keep_versions_is_default = cast(Any, keep_versions) is _DEFAULT_NONE
599627
if args:
600628
warnings.warn(
601629
"Passing commit arguments positionally to vacuum() is deprecated "
@@ -618,9 +646,19 @@ def vacuum(
618646
)
619647
commit_properties = args[1]
620648
if len(args) >= 3:
649+
if not full_is_default:
650+
raise TypeError("vacuum() got multiple values for 'full'")
621651
full = args[2]
652+
full_is_default = False
622653
if len(args) == 4:
654+
if not keep_versions_is_default:
655+
raise TypeError("vacuum() got multiple values for 'keep_versions'")
623656
keep_versions = args[3]
657+
keep_versions_is_default = False
658+
if full_is_default:
659+
full = False
660+
if keep_versions_is_default:
661+
keep_versions = None
624662
if retention_hours:
625663
if retention_hours < 0:
626664
raise ValueError("The retention periods should be positive.")

python/tests/test_create.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,38 @@ def test_create_positional_and_keyword_commit_conflict_raises(
220220
commit,
221221
commit_properties=commit,
222222
)
223+
224+
225+
def test_create_positional_and_keyword_raise_if_key_not_exists_conflict_raises(
226+
tmp_path: pathlib.Path,
227+
monkeypatch,
228+
):
229+
import deltalake.table as table_module
230+
231+
def fake_create(*args):
232+
pass
233+
234+
def fake_init(self, *args, **kwargs):
235+
pass
236+
237+
monkeypatch.setattr(table_module, "_create_deltalake", fake_create)
238+
monkeypatch.setattr(DeltaTable, "__init__", fake_init)
239+
240+
commit = CommitProperties(custom_metadata={"userName": "John Doe"})
241+
with pytest.raises(
242+
TypeError, match="multiple values for 'raise_if_key_not_exists'"
243+
):
244+
DeltaTable.create(
245+
tmp_path,
246+
schema,
247+
"error",
248+
None,
249+
None,
250+
None,
251+
None,
252+
None,
253+
commit,
254+
None,
255+
False,
256+
raise_if_key_not_exists=True,
257+
)

python/tests/test_vacuum.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,27 @@ def test_vacuum_positional_and_keyword_commit_conflict_raises():
357357
commit = CommitProperties(custom_metadata={"userName": "John Doe"})
358358
with pytest.raises(TypeError, match="multiple values for 'commit_properties'"):
359359
dt.vacuum(None, True, True, None, commit, commit_properties=commit)
360+
361+
362+
def test_vacuum_positional_and_keyword_full_conflict_raises():
363+
class StubTable:
364+
def vacuum(self, *args):
365+
return []
366+
367+
dt = object.__new__(DeltaTable)
368+
dt._table = StubTable()
369+
commit = CommitProperties(custom_metadata={"userName": "John Doe"})
370+
with pytest.raises(TypeError, match="multiple values for 'full'"):
371+
dt.vacuum(None, True, True, None, commit, False, full=True)
372+
373+
374+
def test_vacuum_positional_and_keyword_keep_versions_conflict_raises():
375+
class StubTable:
376+
def vacuum(self, *args):
377+
return []
378+
379+
dt = object.__new__(DeltaTable)
380+
dt._table = StubTable()
381+
commit = CommitProperties(custom_metadata={"userName": "John Doe"})
382+
with pytest.raises(TypeError, match="multiple values for 'keep_versions'"):
383+
dt.vacuum(None, True, True, None, commit, False, [1], keep_versions=[2])

0 commit comments

Comments
 (0)