Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ jobs:
python-version: '3.12'
- name: Install dependencies for Python CodeGen check
run: |
python3.12 -m pip install 'black==23.12.1' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.12 -m pip install 'black==26.3.1' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.12 -m pip list
- name: Python CodeGen check for branch-3.5
if: inputs.branch == 'branch-3.5'
Expand Down Expand Up @@ -944,7 +944,7 @@ jobs:
run: |
# SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638
# Should delete this section after SPARK 3.5 EOL.
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==26.3.1'
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: List Python packages
shell: 'script -q -e -c "bash {0}"'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
run: |
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==26.3.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
- name: Install Ruby for documentation generation
Expand Down
2 changes: 1 addition & 1 deletion dev/create-release/spark-rm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ RUN python3.10 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13'
sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas \
'plotly>=4.8' 'docutils<0.18.0' 'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' \
'pytest-mypy-plugins==1.9.3' 'black==23.12.1' 'pandas-stubs==1.2.0.53' \
'pytest-mypy-plugins==1.9.3' 'black==26.3.1' 'pandas-stubs==1.2.0.53' \
'grpcio==1.76.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' \
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' \
Expand Down
4 changes: 2 additions & 2 deletions dev/merge_spark_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
# In this case, if the PR is committed to the master branch and the release branch, we
# only consider the release branch to be the fix version. E.g. it is not valid to have
# both 1.1.0 and 1.0.0 as fix versions.
(major, minor, patch) = v.split(".")
major, minor, patch = v.split(".")
if patch == "0":
previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
if previous in default_fix_versions:
Expand Down Expand Up @@ -737,7 +737,7 @@ def main():
if __name__ == "__main__":
import doctest

(failure_count, test_count) = doctest.testmod()
failure_count, test_count = doctest.testmod()
if failure_count:
sys.exit(-1)
try:
Expand Down
2 changes: 1 addition & 1 deletion dev/reformat-python
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ FWDIR="$( cd "$DIR"/.. && pwd )"
cd "$FWDIR"

BLACK_BUILD="${PYTHON_EXECUTABLE} -m black"
BLACK_VERSION="23.12.1"
BLACK_VERSION="26.3.1"
$PYTHON_EXECUTABLE -c 'import black' 2> /dev/null
if [ $? -ne 0 ]; then
echo "The Python library providing the 'black' module was not found. Please install Black, for example, via 'pip install black==$BLACK_VERSION'."
Expand Down
2 changes: 1 addition & 1 deletion dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jira>=3.5.2
PyGithub

# pandas API on Spark Code formatter.
black==23.12.1
black==26.3.1
py

# Spark Connect (required)
Expand Down
2 changes: 1 addition & 1 deletion dev/spark-test-image/connect-gen-protos/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN python3.12 -m pip install \
'mypy==1.19.1' \
'mypy-protobuf==3.3.0' \
'black==23.12.1'
'black==26.3.1'

# Mount the Spark repo at /spark
WORKDIR /spark
Expand Down
2 changes: 1 addition & 1 deletion dev/spark-test-image/docs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# See 'docutils<0.18.0' in SPARK-39421
RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==26.3.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \
&& python3.12 -m pip cache purge
2 changes: 1 addition & 1 deletion dev/spark-test-image/lint/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ RUN python3.12 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

RUN python3.12 -m pip install \
'black==23.12.1' \
'black==26.3.1' \
'flake8==3.9.0' \
'ruff==0.14.8' \
'googleapis-common-protos-stubs==2.2.0' \
Expand Down
18 changes: 9 additions & 9 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,9 @@ def __hash__(self):
sbt_test_goals=[
"catalyst/test",
],
environ=None
if "GITHUB_ACTIONS" not in os.environ
else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
environ=(
None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
),
)

sql = Module(
Expand All @@ -268,9 +268,9 @@ def __hash__(self):
sbt_test_goals=[
"sql/test",
],
environ=None
if "GITHUB_ACTIONS" not in os.environ
else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
environ=(
None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
),
)

hive = Module(
Expand Down Expand Up @@ -1688,9 +1688,9 @@ def __hash__(self):
build_profile_flags=["-Pdocker-integration-tests"],
source_file_regexes=["connector/docker-integration-tests"],
sbt_test_goals=["docker-integration-tests/test"],
environ=None
if "GITHUB_ACTIONS" not in os.environ
else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
environ=(
None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
),
test_tags=["org.apache.spark.tags.DockerTest"],
)

Expand Down
1 change: 0 additions & 1 deletion dev/sparktestsupport/toposort.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

from functools import reduce as _reduce


__all__ = ["toposort", "toposort_flatten"]


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ ignore = [
[tool.black]
# When changing the version, we have to update
# GitHub workflow version and dev/reformat-python
required-version = "23.12.1"
required-version = "26.3.1"
line-length = 100
target-version = ['py39']
include = '\.pyi?$'
Expand Down
1 change: 0 additions & 1 deletion python/benchmarks/bench_eval_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
from pyspark.util import PythonEvalType
from pyspark.worker import main as worker_main


# ---------------------------------------------------------------------------
# Wire-format helpers
# ---------------------------------------------------------------------------
Expand Down
8 changes: 2 additions & 6 deletions python/pyspark/accumulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ class SpecialAccumulatorIds:


class Accumulator(Generic[T]):

"""
A shared variable that can be accumulated, i.e., has a commutative and associative "add"
operation. Worker tasks on a Spark cluster can add values to an Accumulator with the `+=`
Expand Down Expand Up @@ -186,7 +185,6 @@ def __repr__(self) -> str:


class AccumulatorParam(Generic[T]):

"""
Helper object that defines how to accumulate values of a given type.

Expand Down Expand Up @@ -229,7 +227,6 @@ def addInPlace(self, value1: T, value2: T) -> T:


class AddingAccumulatorParam(AccumulatorParam[U]):

"""
An AccumulatorParam that uses the + operators to add values. Designed for simple types
such as integers, floats, and lists. Requires the zero value for the underlying type
Expand All @@ -254,7 +251,6 @@ def addInPlace(self, value1: U, value2: U) -> U:


class UpdateRequestHandler(socketserver.StreamRequestHandler):

"""
This handler will keep polling updates from the same socket until the
server is shutdown.
Expand Down Expand Up @@ -299,7 +295,7 @@ def poll(func: Callable[[], bool]) -> None:
def accum_updates() -> bool:
num_updates = read_int(self.rfile)
for _ in range(num_updates):
(aid, update) = pickleSer._read_with_length(self.rfile)
aid, update = pickleSer._read_with_length(self.rfile)
_accumulatorRegistry[aid] += update
# Write a byte in acknowledgement
self.wfile.write(struct.pack("!b", 1))
Expand Down Expand Up @@ -406,7 +402,7 @@ def _start_update_server(
# The small batch size here ensures that we see multiple batches,
# even in these small test examples:
globs["sc"] = SparkContext("local", "test")
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
globs["sc"].stop()
if failure_count:
sys.exit(-1)
17 changes: 6 additions & 11 deletions python/pyspark/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,10 @@ def setSparkHome(self, value: str) -> "SparkConf":
return self

@overload
def setExecutorEnv(self, key: str, value: str) -> "SparkConf":
...
def setExecutorEnv(self, key: str, value: str) -> "SparkConf": ...

@overload
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf":
...
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf": ...

def setExecutorEnv(
self,
Expand Down Expand Up @@ -212,16 +210,13 @@ def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
return self

@overload
def get(self, key: str) -> Optional[str]:
...
def get(self, key: str) -> Optional[str]: ...

@overload
def get(self, key: str, defaultValue: None) -> Optional[str]:
...
def get(self, key: str, defaultValue: None) -> Optional[str]: ...

@overload
def get(self, key: str, defaultValue: str) -> str:
...
def get(self, key: str, defaultValue: str) -> str: ...

def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
"""Get the configured value for some key, or return a default otherwise."""
Expand Down Expand Up @@ -273,7 +268,7 @@ def toDebugString(self) -> str:
def _test() -> None:
import doctest

(failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
failure_count, test_count = doctest.testmod(optionflags=doctest.ELLIPSIS)
if failure_count:
sys.exit(-1)

Expand Down
16 changes: 6 additions & 10 deletions python/pyspark/core/broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def _from_id(bid: int) -> "Broadcast[Any]":


class Broadcast(Generic[T]):

"""
A broadcast variable created with :meth:`SparkContext.broadcast`.
Access its value through :attr:`value`.
Expand All @@ -91,16 +90,13 @@ def __init__(
sc: "SparkContext",
value: T,
pickle_registry: "BroadcastPickleRegistry",
):
...
): ...

@overload # On worker without decryption server
def __init__(self: "Broadcast[Any]", *, path: str):
...
def __init__(self: "Broadcast[Any]", *, path: str): ...

@overload # On worker with decryption server
def __init__(self: "Broadcast[Any]", *, sock_file: str):
...
def __init__(self: "Broadcast[Any]", *, sock_file: str): ...

def __init__( # type: ignore[misc]
self,
Expand All @@ -126,7 +122,7 @@ def __init__( # type: ignore[misc]
# with encryption, we ask the jvm to do the encryption for us, we send it data
# over a socket
conn_info, auth_secret = self._python_broadcast.setupEncryptionServer()
(encryption_sock_file, _) = local_connect_and_auth(conn_info, auth_secret)
encryption_sock_file, _ = local_connect_and_auth(conn_info, auth_secret)
broadcast_out = ChunkedStream(encryption_sock_file, 8192)
else:
# no encryption, we can just write pickled data directly to the file from python
Expand Down Expand Up @@ -271,7 +267,7 @@ def value(self) -> T:
# if its on the driver, since executor decryption is handled already
if self._sc is not None and self._sc._encryption_enabled:
conn_info, auth_secret = self._python_broadcast.setupDecryptionServer()
(decrypted_sock_file, _) = local_connect_and_auth(conn_info, auth_secret)
decrypted_sock_file, _ = local_connect_and_auth(conn_info, auth_secret)
self._python_broadcast.waitTillBroadcastDataSent()
return self.load(decrypted_sock_file)
else:
Expand Down Expand Up @@ -372,7 +368,7 @@ def _test() -> None:
spark = SparkSession.builder.master("local[4]").appName("broadcast tests").getOrCreate()
globs["spark"] = spark

(failure_count, test_count) = doctest.testmod(pyspark.core.broadcast, globs=globs)
failure_count, test_count = doctest.testmod(pyspark.core.broadcast, globs=globs)
spark.stop()
if failure_count:
sys.exit(-1)
Expand Down
21 changes: 9 additions & 12 deletions python/pyspark/core/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@


class SparkContext:

"""
Main entry point for Spark functionality. A SparkContext represents the
connection to a Spark cluster, and can be used to create :class:`RDD` and
Expand Down Expand Up @@ -162,9 +161,9 @@ class SparkContext:
_next_accum_id = 0
_active_spark_context: ClassVar[Optional["SparkContext"]] = None
_lock = RLock()
_python_includes: Optional[
List[str]
] = None # zip and egg files that need to be added to PYTHONPATH
_python_includes: Optional[List[str]] = (
None # zip and egg files that need to be added to PYTHONPATH
)
serializer: Serializer
profiler_collector: ProfilerCollector

Expand Down Expand Up @@ -327,7 +326,7 @@ def _do_init(
self._accumulatorServer.server_address
)
else:
(host, port) = self._accumulatorServer.server_address # type: ignore[misc]
host, port = self._accumulatorServer.server_address # type: ignore[misc]
self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port, auth_token)
self._jsc.sc().register(self._javaAccumulator)

Expand Down Expand Up @@ -362,7 +361,7 @@ def _do_init(
# with SparkContext.addFile, so we just need to add them to the PYTHONPATH
for path in self._conf.get("spark.submit.pyFiles", "").split(","):
if path != "":
(dirname, filename) = os.path.split(path)
dirname, filename = os.path.split(path)
try:
filepath = os.path.join(SparkFiles.getRootDirectory(), filename)
if not os.path.exists(filepath):
Expand Down Expand Up @@ -438,9 +437,7 @@ def _repr_html_(self) -> str:
<dd><code>{sc.appName}</code></dd>
</dl>
</div>
""".format(
sc=self
)
""".format(sc=self)

def _initialize_context(self, jconf: JavaObject) -> JavaObject:
"""
Expand Down Expand Up @@ -901,7 +898,7 @@ def _serialize_to_jvm(
if self._encryption_enabled:
# with encryption, we open a server in java and send the data directly
server = server_func()
(sock_file, _) = local_connect_and_auth(server.connInfo(), server.secret())
sock_file, _ = local_connect_and_auth(server.connInfo(), server.secret())
chunked_out = ChunkedStream(sock_file, 8192)
serializer.dump_stream(data, chunked_out)
chunked_out.close()
Expand Down Expand Up @@ -1987,7 +1984,7 @@ def addPyFile(self, path: str) -> None:
A path can be added only once. Subsequent additions of the same path are ignored.
"""
self.addFile(path)
(dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix
dirname, filename = os.path.split(path) # dirname may be directory or HDFS/S3 prefix
if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
assert self._python_includes is not None
self._python_includes.append(filename)
Expand Down Expand Up @@ -2663,7 +2660,7 @@ def _test() -> None:
globs = globals().copy()
conf = SparkConf().set("spark.ui.enabled", "True")
globs["sc"] = SparkContext("local[4]", "context tests", conf=conf)
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
globs["sc"].stop()
if failure_count:
sys.exit(-1)
Expand Down
Loading