Skip to content

Commit 92e75bb

Browse files
committed
refactor: move from pyarrow to arro3
Signed-off-by: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com>
1 parent 2fe2ec5 commit 92e75bb

53 files changed

Lines changed: 4036 additions & 2668 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/retry_integration_test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ MAX_RETRIES=$2
55
RETRY_DELAY=$3
66
ATTEMPT=1
77
run_command() {
8-
uv run --no-sync pytest -m "($TEST_NAME and integration)" --doctest-modules 2>&1
8+
uv run --no-sync pytest -m "($TEST_NAME and integration and pyarrow)" --doctest-modules 2>&1
99
}
1010
until [ $ATTEMPT -gt $MAX_RETRIES ]
1111
do

.github/workflows/python_build.yml

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333

3434
- name: Check Python
3535
run: |
36-
uv sync --only-group dev --no-install-project
36+
uv sync --no-install-project
3737
make check-python
3838
3939
- name: Check Rust
@@ -49,7 +49,7 @@ jobs:
4949
run: make unit-test
5050

5151
test:
52-
name: Python Build (Python 3.10 PyArrow latest)
52+
name: Python Build (Python 3.10 Optional latest pyarrow)
5353
runs-on: ubuntu-latest
5454
env:
5555
SCCACHE_GHA_ENABLED: "true"
@@ -70,13 +70,18 @@ jobs:
7070
run: make develop
7171

7272
- name: Run tests
73-
run: uv run --no-sync pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules
73+
run: uv run --no-sync pytest -m '((s3 or azure) and integration) or not integration and not benchmark and pyarrow' --doctest-modules
7474

7575
- name: Test without pandas
7676
run: |
7777
uv pip uninstall pandas
78-
uv run --no-sync pytest -m "not pandas and not integration and not benchmark"
79-
uv pip install pandas
78+
uv run --no-sync pytest -m "pyarrow and not pandas and not integration and not benchmark"
79+
80+
- name: Test without pyarrow and without pandas
81+
run: |
82+
uv pip uninstall pyarrow
83+
uv run --no-sync pytest -m "not pyarrow and not pandas and not integration and not benchmark and no_pyarrow"
84+
8085
8186
test-lakefs:
8287
name: Python Build (Python 3.10 LakeFS Integration tests)
@@ -189,4 +194,19 @@ jobs:
189194
190195
- name: Run deltalake
191196
run: |
192-
uv run python -c 'import deltalake'
197+
uv run --no-sync python -c 'import deltalake'
198+
199+
- name: Run deltalake without pyarrow
200+
run: |
201+
uv pip uninstall pyarrow
202+
uv run --no-sync python -c 'import deltalake'
203+
204+
- name: Run deltalake without pyarrow pandas
205+
run: |
206+
uv pip uninstall pyarrow pandas
207+
uv run --no-sync python -c 'import deltalake'
208+
209+
- name: Run deltalake without pandas
210+
run: |
211+
uv pip install pyarrow
212+
uv run --no-sync python -c 'import deltalake'

docs/usage/examining-table.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ the table will be loaded into.
5757
let schema = table.get_schema()?;
5858
println!("schema: {:?}", schema);
5959
```
60-
These schemas have a JSON representation that can be retrieved.
60+
These schemas have a JSON representation that can be retrieved.
6161

6262
=== "Python"
6363
To reconstruct from json, use [DeltaTable.schema.to_json()][deltalake.schema.Schema.to_json].
@@ -73,10 +73,10 @@ These schemas have a JSON representation that can be retrieved.
7373
It is also possible to retrieve the Arrow schema:
7474
=== "Python"
7575

76-
Use [DeltaTable.schema.to_pyarrow()][deltalake.schema.Schema.to_pyarrow] to retrieve the PyArrow schema:
76+
Use [DeltaTable.schema.to_arrow()][deltalake.schema.Schema.to_arrow] to retrieve the Arro3 schema:
7777

7878
``` python
79-
>>> dt.schema().to_pyarrow()
79+
>>> dt.schema().to_arrow()
8080
id: int64
8181
```
8282
=== "Rust"
@@ -161,4 +161,4 @@ This works even with past versions of the table:
161161
table.load_version(0).await?;
162162
let actions = table.snapshot()?.add_actions_table(true)?;
163163
println!("{}", pretty_format_batches(&vec![actions])?);
164-
```
164+
```

docs/usage/querying-delta-tables.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ To load into Pandas or a PyArrow table use the `DeltaTable.to_pandas` and `Delta
1818
``` python
1919
>>> from deltalake import DeltaTable
2020
>>> dt = DeltaTable("../rust/tests/data/delta-0.8.0-partitioned")
21-
>>> dt.schema().to_pyarrow()
21+
>>> dt.schema().to_arrow()
2222
value: string
2323
year: string
2424
month: string
@@ -117,7 +117,7 @@ Dask Name: read-parquet, 6 tasks
117117
1 7 2021 12 20
118118
```
119119

120-
When working with the Rust API, Apache Datafusion can be used to query data from a delta table.
120+
When working with the Rust API, Apache Datafusion can be used to query data from a delta table.
121121

122122
```rust
123123
let table = deltalake::open_table("../rust/tests/data/delta-0.8.0-partitioned").await?;
@@ -134,4 +134,4 @@ let ctx = SessionContext::new();
134134
let dataframe = ctx.read_table( Arc::new(table.clone()))?;
135135
let df = dataframe.filter(col("year").eq(lit(2021)))?.select(vec![col("value")])?;
136136
df.show().await?;
137-
```
137+
```

python/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ doc = false
1717
[dependencies]
1818
delta_kernel.workspace = true
1919

20+
pyo3-arrow = { version = "*", default-features = false}
21+
2022
# arrow
2123
arrow-schema = { workspace = true, features = ["serde"] }
2224

python/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ test-cov: ## Create coverage report
6767

6868
.PHONY: test-pyspark
6969
test-pyspark:
70-
uv run --no-sync pytest -m 'pyspark and integration'
70+
uv run --no-sync pytest -m 'pyarrow and pyspark and integration'
7171

7272
.PHONY: build-documentation
7373
build-documentation: ## Build documentation with Sphinx

0 commit comments

Comments
 (0)