Skip to content

Commit 9f631b5

Browse files
smeyerreion-elgreco
authored andcommitted
fix: handle checking partition filters in array/list when converting to pyarrow dataset
Signed-off-by: Sam Meyer-Reed <smeyerreed@gmail.com>
1 parent 4c44133 commit 9f631b5

2 files changed

Lines changed: 24 additions & 5 deletions

File tree

python/deltalake/table.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -883,11 +883,7 @@ def to_pyarrow_dataset(
883883
self.schema().to_arrow(as_large_types=as_large_types)
884884
)
885885

886-
if partitions:
887-
partitions = [
888-
(column, operator, encode_partition_value(value))
889-
for column, operator, value in partitions
890-
]
886+
partitions = self._stringify_partition_values(partitions)
891887

892888
fragments = [
893889
format.make_fragment(

python/tests/test_table_read.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,29 @@ def test_read_partitioned_table_with_primitive_type_partition_filters():
291291
assert len(result_string["id"]) == 8
292292
assert all(category == "A" for category in result_string["category"])
293293

294+
partitions_bool_in = [("is_active", "in", [True, False])]
295+
result_bool_in = dt.to_pyarrow_dataset(partitions_bool_in).to_table().to_pydict()
296+
total_rows = len(dt.to_pyarrow_dataset().to_table().to_pydict()["id"])
297+
assert len(result_bool_in["id"]) == total_rows
298+
299+
partitions_year_in = [("year", "in", [2020, 2022.0])]
300+
result_year_in = dt.to_pyarrow_dataset(partitions_year_in).to_table().to_pydict()
301+
assert len(result_year_in["id"]) == 8
302+
assert all(year == "2020" for year in result_year_in["year"])
303+
304+
partitions_bool_true_only = [("is_active", "in", [True])]
305+
result_bool_true_only = dt.to_pyarrow_dataset(partitions_bool_true_only).to_table().to_pydict()
306+
assert len(result_bool_true_only["id"]) == 8
307+
assert all(is_active == "true" for is_active in result_bool_true_only["is_active"])
308+
309+
with pytest.raises(ValueError, match="Could not encode partition value for type"):
310+
partitions_invalid = [("category", "=", {"invalid": "dict"})]
311+
dt.to_pyarrow_dataset(partitions_invalid)
312+
313+
with pytest.raises(ValueError, match="Could not encode partition value for type"):
314+
partitions_invalid_list = [("category", "in", [{"invalid": "dict"}, "A"])]
315+
dt.to_pyarrow_dataset(partitions_invalid_list)
316+
294317

295318
@pytest.mark.pyarrow
296319
def test_read_empty_delta_table_after_delete():

0 commit comments

Comments
 (0)