Skip to content

Commit c15e36b

Browse files
zhengruifengdongjoon-hyun
authored andcommitted
[SPARK-55705][PYTHON][INFRA] Upgrade PyArrow to 23
### What changes were proposed in this pull request? Upgrade PyArrow to 23 ### Why are the changes needed? refresh the images to test against latest pyarrow ### Does this PR introduce _any_ user-facing change? no, infra-only ### How was this patch tested? 1, for changes in lint/doc/python3-12, the PR builder should cover; 2, for other places, will monitor the scheduled jobs ### Was this patch authored or co-authored using generative AI tooling? no Closes #54504 from zhengruifeng/upgrade_pa_23. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent baa8507 commit c15e36b

File tree

11 files changed

+11
-11
lines changed

11 files changed

+11
-11
lines changed

.github/workflows/python_hosted_runner_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ jobs:
154154
run: |
155155
python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2'
156156
python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0'
157-
python${{matrix.python}} -m pip install numpy 'pyarrow>=22.0.0' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
157+
python${{matrix.python}} -m pip install numpy 'pyarrow>=23.0.0' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
158158
python${{matrix.python}} -m pip install 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' 'zstandard==0.25.0' 'graphviz==0.20.3' && \
159159
python${{matrix.python}} -m pip cache purge
160160
- name: List Python packages

dev/spark-test-image/docs/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
8888
# See 'ipython_genutils' in SPARK-38517
8989
# See 'docutils<0.18.0' in SPARK-39421
9090
RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe \
91-
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
91+
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
9292
'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
9393
'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
9494
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \

dev/spark-test-image/lint/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ RUN python3.12 -m pip install \
9595
'pandas' \
9696
'pandas-stubs' \
9797
'plotly>=4.8' \
98-
'pyarrow>=22.0.0' \
98+
'pyarrow>=23.0.0' \
9999
'pytest-mypy-plugins==1.9.3' \
100100
'pytest==7.1.3' \
101101
'scipy>=1.8.0' \

dev/spark-test-image/python-310/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
6363
RUN python3.10 -m venv $VIRTUAL_ENV
6464
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6565

66-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
66+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
6767
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
6868

6969
RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \

dev/spark-test-image/python-311/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
6060
RUN python3.11 -m venv $VIRTUAL_ENV
6161
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6262

63-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
63+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
6464
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
6565

6666
RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \

dev/spark-test-image/python-312-classic-only/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
5656
RUN python3.12 -m venv $VIRTUAL_ENV
5757
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
5858

59-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 pandas==2.3.3 plotly<6.0.0 matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
59+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 pandas==2.3.3 plotly<6.0.0 matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
6060
ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
6161

6262
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12

dev/spark-test-image/python-312-pandas-3/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
5959
RUN python3.12 -m venv $VIRTUAL_ENV
6060
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6161

62-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas>=3 scipy plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
62+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas>=3 scipy plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
6363
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
6464

6565
RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \

dev/spark-test-image/python-312/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
5656
RUN python3.12 -m venv $VIRTUAL_ENV
5757
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
5858

59-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
59+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
6060
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
6161

6262
RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \

dev/spark-test-image/python-313/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
6060
RUN python3.13 -m venv $VIRTUAL_ENV
6161
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6262

63-
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
63+
ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
6464
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
6565

6666
RUN python3.13 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \

dev/spark-test-image/python-314-nogil/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,5 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6262

6363
# TODO: Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS when it supports Python 3.14 free threaded
6464
# TODO: Add lxml, grpcio, grpcio-status back when they support Python 3.14 free threaded
65-
RUN python3.14t -m pip install 'numpy>=2.1' 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.3.3' 'pystack>=1.6.0' scipy coverage matplotlib openpyxl jinja2 psutil && \
65+
RUN python3.14t -m pip install 'numpy>=2.1' 'pyarrow>=23.0.0' 'six==1.16.0' 'pandas==2.3.3' 'pystack>=1.6.0' scipy coverage matplotlib openpyxl jinja2 psutil && \
6666
python3.14t -m pip cache purge

0 commit comments

Comments
 (0)