apache · LuciferYang · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -842,7 +842,7 @@ jobs:
         python-version: '3.12'
     - name: Install dependencies for Python CodeGen check
       run: |
-        python3.12 -m pip install 'black==23.12.1' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
+        python3.12 -m pip install 'black==26.3.1' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
         python3.12 -m pip list
     - name: Python CodeGen check for branch-3.5
       if: inputs.branch == 'branch-3.5'
@@ -944,7 +944,7 @@ jobs:
       run: |
         # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638
         # Should delete this section after SPARK 3.5 EOL.
-        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
+        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==26.3.1'
         python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
     - name: List Python packages
       shell: 'script -q -e -c "bash {0}"'

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -62,7 +62,7 @@ jobs:
         run: |
          pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
             ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
-            'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
+            'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==26.3.1' \
             'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
             'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
       - name: Install Ruby for documentation generation

diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
@@ -61,7 +61,7 @@ RUN python3.10 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13'
     sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
     ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas \
     'plotly>=4.8' 'docutils<0.18.0' 'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' \
-    'pytest-mypy-plugins==1.9.3' 'black==23.12.1' 'pandas-stubs==1.2.0.53' \
+    'pytest-mypy-plugins==1.9.3' 'black==26.3.1' 'pandas-stubs==1.2.0.53' \
     'grpcio==1.76.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
     'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' \
     'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' \

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
@@ -345,7 +345,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
         # In this case, if the PR is committed to the master branch and the release branch, we
         # only consider the release branch to be the fix version. E.g. it is not valid to have
         # both 1.1.0 and 1.0.0 as fix versions.
-        (major, minor, patch) = v.split(".")
+        major, minor, patch = v.split(".")
         if patch == "0":
             previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
             if previous in default_fix_versions:
@@ -737,7 +737,7 @@ def main():
 if __name__ == "__main__":
     import doctest
 
-    (failure_count, test_count) = doctest.testmod()
+    failure_count, test_count = doctest.testmod()
     if failure_count:
         sys.exit(-1)
     try:

diff --git a/dev/reformat-python b/dev/reformat-python
@@ -22,7 +22,7 @@ FWDIR="$( cd "$DIR"/.. && pwd )"
 cd "$FWDIR"
 
 BLACK_BUILD="${PYTHON_EXECUTABLE} -m black"
-BLACK_VERSION="23.12.1"
+BLACK_VERSION="26.3.1"
 $PYTHON_EXECUTABLE -c 'import black' 2> /dev/null
 if [ $? -ne 0 ]; then
     echo "The Python library providing the 'black' module was not found. Please install Black, for example, via 'pip install black==$BLACK_VERSION'."

diff --git a/dev/requirements.txt b/dev/requirements.txt
@@ -59,7 +59,7 @@ jira>=3.5.2
 PyGithub
 
 # pandas API on Spark Code formatter.
-black==23.12.1
+black==26.3.1
 py
 
 # Spark Connect (required)

diff --git a/dev/spark-test-image/connect-gen-protos/Dockerfile b/dev/spark-test-image/connect-gen-protos/Dockerfile
@@ -53,7 +53,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 RUN python3.12 -m pip install \
     'mypy==1.19.1' \
     'mypy-protobuf==3.3.0' \
-    'black==23.12.1'
+    'black==26.3.1'
 
 # Mount the Spark repo at /spark
 WORKDIR /spark

diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile
@@ -89,7 +89,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # See 'docutils<0.18.0' in SPARK-39421
 RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe \
   ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
-  'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
+  'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==26.3.1' \
   'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \
   && python3.12 -m pip cache purge
diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile
@@ -77,7 +77,7 @@ RUN python3.12 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 RUN python3.12 -m pip install \
-    'black==23.12.1' \
+    'black==26.3.1' \
     'flake8==3.9.0' \
     'ruff==0.14.8' \
     'googleapis-common-protos-stubs==2.2.0' \

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -253,9 +253,9 @@ def __hash__(self):
     sbt_test_goals=[
         "catalyst/test",
     ],
-    environ=None
-    if "GITHUB_ACTIONS" not in os.environ
-    else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
+    environ=(
+        None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
+    ),
 )
 
 sql = Module(
@@ -268,9 +268,9 @@ def __hash__(self):
     sbt_test_goals=[
         "sql/test",
     ],
-    environ=None
-    if "GITHUB_ACTIONS" not in os.environ
-    else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
+    environ=(
+        None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
+    ),
 )
 
 hive = Module(
@@ -1688,9 +1688,9 @@ def __hash__(self):
     build_profile_flags=["-Pdocker-integration-tests"],
     source_file_regexes=["connector/docker-integration-tests"],
     sbt_test_goals=["docker-integration-tests/test"],
-    environ=None
-    if "GITHUB_ACTIONS" not in os.environ
-    else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"},
+    environ=(
+        None if "GITHUB_ACTIONS" not in os.environ else {"ENABLE_DOCKER_INTEGRATION_TESTS": "1"}
+    ),
     test_tags=["org.apache.spark.tags.DockerTest"],
 )
 

diff --git a/dev/sparktestsupport/toposort.py b/dev/sparktestsupport/toposort.py
@@ -34,7 +34,6 @@
 
 from functools import reduce as _reduce
 
-
 __all__ = ["toposort", "toposort_flatten"]
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -64,7 +64,7 @@ ignore = [
 [tool.black]
 # When changing the version, we have to update
 # GitHub workflow version and dev/reformat-python
-required-version = "23.12.1"
+required-version = "26.3.1"
 line-length = 100
 target-version = ['py39']
 include = '\.pyi?$'

diff --git a/python/benchmarks/bench_eval_type.py b/python/benchmarks/bench_eval_type.py
@@ -50,7 +50,6 @@
 from pyspark.util import PythonEvalType
 from pyspark.worker import main as worker_main
 
-
 # ---------------------------------------------------------------------------
 # Wire-format helpers
 # ---------------------------------------------------------------------------

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
@@ -63,7 +63,6 @@ class SpecialAccumulatorIds:
 
 
 class Accumulator(Generic[T]):
-
     """
     A shared variable that can be accumulated, i.e., has a commutative and associative "add"
     operation. Worker tasks on a Spark cluster can add values to an Accumulator with the `+=`
@@ -186,7 +185,6 @@ def __repr__(self) -> str:
 
 
 class AccumulatorParam(Generic[T]):
-
     """
     Helper object that defines how to accumulate values of a given type.
 
@@ -229,7 +227,6 @@ def addInPlace(self, value1: T, value2: T) -> T:
 
 
 class AddingAccumulatorParam(AccumulatorParam[U]):
-
     """
     An AccumulatorParam that uses the + operators to add values. Designed for simple types
     such as integers, floats, and lists. Requires the zero value for the underlying type
@@ -254,7 +251,6 @@ def addInPlace(self, value1: U, value2: U) -> U:
 
 
 class UpdateRequestHandler(socketserver.StreamRequestHandler):
-
     """
     This handler will keep polling updates from the same socket until the
     server is shutdown.
@@ -299,7 +295,7 @@ def poll(func: Callable[[], bool]) -> None:
         def accum_updates() -> bool:
             num_updates = read_int(self.rfile)
             for _ in range(num_updates):
-                (aid, update) = pickleSer._read_with_length(self.rfile)
+                aid, update = pickleSer._read_with_length(self.rfile)
                 _accumulatorRegistry[aid] += update
             # Write a byte in acknowledgement
             self.wfile.write(struct.pack("!b", 1))
@@ -406,7 +402,7 @@ def _start_update_server(
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
     globs["sc"] = SparkContext("local", "test")
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs["sc"].stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
@@ -172,12 +172,10 @@ def setSparkHome(self, value: str) -> "SparkConf":
         return self
 
     @overload
-    def setExecutorEnv(self, key: str, value: str) -> "SparkConf":
-        ...
+    def setExecutorEnv(self, key: str, value: str) -> "SparkConf": ...
 
     @overload
-    def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf":
-        ...
+    def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf": ...
 
     def setExecutorEnv(
         self,
@@ -212,16 +210,13 @@ def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
         return self
 
     @overload
-    def get(self, key: str) -> Optional[str]:
-        ...
+    def get(self, key: str) -> Optional[str]: ...
 
     @overload
-    def get(self, key: str, defaultValue: None) -> Optional[str]:
-        ...
+    def get(self, key: str, defaultValue: None) -> Optional[str]: ...
 
     @overload
-    def get(self, key: str, defaultValue: str) -> str:
-        ...
+    def get(self, key: str, defaultValue: str) -> str: ...
 
     def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
         """Get the configured value for some key, or return a default otherwise."""
@@ -273,7 +268,7 @@ def toDebugString(self) -> str:
 def _test() -> None:
     import doctest
 
-    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    failure_count, test_count = doctest.testmod(optionflags=doctest.ELLIPSIS)
     if failure_count:
         sys.exit(-1)
 

diff --git a/python/pyspark/core/broadcast.py b/python/pyspark/core/broadcast.py
@@ -68,7 +68,6 @@ def _from_id(bid: int) -> "Broadcast[Any]":
 
 
 class Broadcast(Generic[T]):
-
     """
     A broadcast variable created with :meth:`SparkContext.broadcast`.
     Access its value through :attr:`value`.
@@ -91,16 +90,13 @@ def __init__(
         sc: "SparkContext",
         value: T,
         pickle_registry: "BroadcastPickleRegistry",
-    ):
-        ...
+    ): ...
 
     @overload  # On worker without decryption server
-    def __init__(self: "Broadcast[Any]", *, path: str):
-        ...
+    def __init__(self: "Broadcast[Any]", *, path: str): ...
 
     @overload  # On worker with decryption server
-    def __init__(self: "Broadcast[Any]", *, sock_file: str):
-        ...
+    def __init__(self: "Broadcast[Any]", *, sock_file: str): ...
 
     def __init__(  # type: ignore[misc]
         self,
@@ -126,7 +122,7 @@ def __init__(  # type: ignore[misc]
                 # with encryption, we ask the jvm to do the encryption for us, we send it data
                 # over a socket
                 conn_info, auth_secret = self._python_broadcast.setupEncryptionServer()
-                (encryption_sock_file, _) = local_connect_and_auth(conn_info, auth_secret)
+                encryption_sock_file, _ = local_connect_and_auth(conn_info, auth_secret)
                 broadcast_out = ChunkedStream(encryption_sock_file, 8192)
             else:
                 # no encryption, we can just write pickled data directly to the file from python
@@ -271,7 +267,7 @@ def value(self) -> T:
             # if its on the driver, since executor decryption is handled already
             if self._sc is not None and self._sc._encryption_enabled:
                 conn_info, auth_secret = self._python_broadcast.setupDecryptionServer()
-                (decrypted_sock_file, _) = local_connect_and_auth(conn_info, auth_secret)
+                decrypted_sock_file, _ = local_connect_and_auth(conn_info, auth_secret)
                 self._python_broadcast.waitTillBroadcastDataSent()
                 return self.load(decrypted_sock_file)
             else:
@@ -372,7 +368,7 @@ def _test() -> None:
     spark = SparkSession.builder.master("local[4]").appName("broadcast tests").getOrCreate()
     globs["spark"] = spark
 
-    (failure_count, test_count) = doctest.testmod(pyspark.core.broadcast, globs=globs)
+    failure_count, test_count = doctest.testmod(pyspark.core.broadcast, globs=globs)
     spark.stop()
     if failure_count:
         sys.exit(-1)

diff --git a/python/pyspark/core/context.py b/python/pyspark/core/context.py
@@ -95,7 +95,6 @@
 
 
 class SparkContext:
-
     """
     Main entry point for Spark functionality. A SparkContext represents the
     connection to a Spark cluster, and can be used to create :class:`RDD` and
@@ -162,9 +161,9 @@ class SparkContext:
     _next_accum_id = 0
     _active_spark_context: ClassVar[Optional["SparkContext"]] = None
     _lock = RLock()
-    _python_includes: Optional[
-        List[str]
-    ] = None  # zip and egg files that need to be added to PYTHONPATH
+    _python_includes: Optional[List[str]] = (
+        None  # zip and egg files that need to be added to PYTHONPATH
+    )
     serializer: Serializer
     profiler_collector: ProfilerCollector
 
@@ -327,7 +326,7 @@ def _do_init(
                 self._accumulatorServer.server_address
             )
         else:
-            (host, port) = self._accumulatorServer.server_address  # type: ignore[misc]
+            host, port = self._accumulatorServer.server_address  # type: ignore[misc]
             self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port, auth_token)
         self._jsc.sc().register(self._javaAccumulator)
 
@@ -362,7 +361,7 @@ def _do_init(
         # with SparkContext.addFile, so we just need to add them to the PYTHONPATH
         for path in self._conf.get("spark.submit.pyFiles", "").split(","):
             if path != "":
-                (dirname, filename) = os.path.split(path)
+                dirname, filename = os.path.split(path)
                 try:
                     filepath = os.path.join(SparkFiles.getRootDirectory(), filename)
                     if not os.path.exists(filepath):
@@ -438,9 +437,7 @@ def _repr_html_(self) -> str:
                 <dd><code>{sc.appName}</code></dd>
             </dl>
         </div>
-        """.format(
-            sc=self
-        )
+        """.format(sc=self)
 
     def _initialize_context(self, jconf: JavaObject) -> JavaObject:
         """
@@ -901,7 +898,7 @@ def _serialize_to_jvm(
         if self._encryption_enabled:
             # with encryption, we open a server in java and send the data directly
             server = server_func()
-            (sock_file, _) = local_connect_and_auth(server.connInfo(), server.secret())
+            sock_file, _ = local_connect_and_auth(server.connInfo(), server.secret())
             chunked_out = ChunkedStream(sock_file, 8192)
             serializer.dump_stream(data, chunked_out)
             chunked_out.close()
@@ -1987,7 +1984,7 @@ def addPyFile(self, path: str) -> None:
         A path can be added only once. Subsequent additions of the same path are ignored.
         """
         self.addFile(path)
-        (dirname, filename) = os.path.split(path)  # dirname may be directory or HDFS/S3 prefix
+        dirname, filename = os.path.split(path)  # dirname may be directory or HDFS/S3 prefix
         if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
             assert self._python_includes is not None
             self._python_includes.append(filename)
@@ -2663,7 +2660,7 @@ def _test() -> None:
     globs = globals().copy()
     conf = SparkConf().set("spark.ui.enabled", "True")
     globs["sc"] = SparkContext("local[4]", "context tests", conf=conf)
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs["sc"].stop()
     if failure_count:
         sys.exit(-1)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,7 +34,6 @@

		from functools import reduce as _reduce


		__all__ = ["toposort", "toposort_flatten"]


Expand Down