apache
diff --git a/‎.claude/skills/review-comet-pr/SKILL.md‎
Lines changed: 0 additions & 2 deletions b/‎.claude/skills/review-comet-pr/SKILL.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/codeql.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/codeql.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 9 additions & 12 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎dev/changelog/0.14.1.md‎
Lines changed: 40 additions & 0 deletions b/‎dev/changelog/0.14.1.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎dev/release/generate-changelog.py‎
Lines changed: 19 additions & 1 deletion b/‎dev/release/generate-changelog.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎dev/release/rat_exclude_files.txt‎
Lines changed: 1 addition & 0 deletions b/‎dev/release/rat_exclude_files.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/contributor-guide/adding_a_new_expression.md‎
Lines changed: 0 additions & 2 deletions b/‎docs/source/contributor-guide/adding_a_new_expression.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/source/contributor-guide/release_process.md‎
Lines changed: 14 additions & 2 deletions b/‎docs/source/contributor-guide/release_process.md‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎docs/source/contributor-guide/roadmap.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/contributor-guide/roadmap.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/contributor-guide/sql-file-tests.md‎
Lines changed: 9 additions & 11 deletions b/‎docs/source/contributor-guide/sql-file-tests.md‎
Lines changed: 9 additions & 11 deletions
@@ -149,8 +149,6 @@ Categories include: `aggregate/`, `array/`, `string/`, `math/`, `struct/`, `map/
 **SQL file structure:**
 
 ```sql
--- ConfigMatrix: parquet.enable.dictionary=false,true
-
 -- Create test data
 statement
 CREATE TABLE test_crc32(col string, a int, b float) USING parquet
 
@@ -45,11 +45,11 @@ jobs:
         persist-credentials: false
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4
+      uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
       with:
         languages: actions
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4
+      uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
       with:
         category: "/language:actions"
@@ -222,31 +222,28 @@ object CometConf extends ShimCometConf {
 
   val COMET_CONVERT_FROM_PARQUET_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.convert.parquet.enabled")
-      .category(CATEGORY_TESTING)
+      .category(CATEGORY_EXEC)
       .doc(
         "When enabled, data from Spark (non-native) Parquet v1 and v2 scans will be converted to " +
-          "Arrow format.  This is an experimental feature and has known issues with " +
-          "non-UTC timezones.")
+          "Arrow format.")
       .booleanConf
       .createWithDefault(false)
 
   val COMET_CONVERT_FROM_JSON_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.convert.json.enabled")
-      .category(CATEGORY_TESTING)
+      .category(CATEGORY_EXEC)
       .doc(
         "When enabled, data from Spark (non-native) JSON v1 and v2 scans will be converted to " +
-          "Arrow format. This is an experimental feature and has known issues with " +
-          "non-UTC timezones.")
+          "Arrow format.")
       .booleanConf
       .createWithDefault(false)
 
   val COMET_CONVERT_FROM_CSV_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.convert.csv.enabled")
-      .category(CATEGORY_TESTING)
+      .category(CATEGORY_EXEC)
       .doc(
         "When enabled, data from Spark (non-native) CSV v1 and v2 scans will be converted to " +
-          "Arrow format. This is an experimental feature and has known issues with " +
-          "non-UTC timezones.")
+          "Arrow format.")
       .booleanConf
       .createWithDefault(false)
 
@@ -743,17 +740,17 @@ object CometConf extends ShimCometConf {
 
   val COMET_SPARK_TO_ARROW_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.sparkToColumnar.enabled")
-      .category(CATEGORY_TESTING)
+      .category(CATEGORY_EXEC)
       .doc("Whether to enable Spark to Arrow columnar conversion. When this is turned on, " +
         "Comet will convert operators in " +
         "`spark.comet.sparkToColumnar.supportedOperatorList` into Arrow columnar format before " +
-        "processing. This is an experimental feature and has known issues with non-UTC timezones.")
+        "processing.")
       .booleanConf
       .createWithDefault(false)
 
   val COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST: ConfigEntry[Seq[String]] =
     conf("spark.comet.sparkToColumnar.supportedOperatorList")
-      .category(CATEGORY_TESTING)
+      .category(CATEGORY_EXEC)
       .doc("A comma-separated list of operators that will be converted to Arrow columnar " +
         s"format when `${COMET_SPARK_TO_ARROW_ENABLED.key}` is true.")
       .stringConf
 
@@ -0,0 +1,40 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# DataFusion Comet 0.14.1 Changelog
+
+This release consists of 5 commits from 1 contributors. See credits at the end of this changelog for more information.
+
+**Fixed bugs:**
+
+- fix: [branch-0.14] backport #3802 - cache object stores and bucket regions to reduce DNS query volume [#3935](https://github.com/apache/datafusion-comet/pull/3935) (andygrove)
+- fix: [branch-0.14] backport #3924 - share unified memory pools across native execution contexts [#3938](https://github.com/apache/datafusion-comet/pull/3938) (andygrove)
+- fix: [branch-0.14] backport #3879 - skip Comet columnar shuffle for stages with DPP scans [#3934](https://github.com/apache/datafusion-comet/pull/3934) (andygrove)
+- fix: [branch-0.14] backport #3914 - use min instead of max when capping write buffer size to Int range [#3936](https://github.com/apache/datafusion-comet/pull/3936) (andygrove)
+- fix: [branch-0.14] backport #3865 - handle ambiguous and non-existent local times [#3937](https://github.com/apache/datafusion-comet/pull/3937) (andygrove)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     5	Andy Grove
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
@@ -142,6 +142,18 @@ def generate_changelog(repo, repo_name, tag1, tag2, version):
     print("Thank you also to everyone who contributed in other ways such as filing issues, reviewing "
           "PRs, and providing feedback on this release.\n")
 
+def resolve_ref(ref):
+    """Resolve a git ref (e.g. HEAD, branch name) to a full commit SHA."""
+    try:
+        return subprocess.check_output(
+            ["git", "rev-parse", ref], text=True
+        ).strip()
+    except subprocess.CalledProcessError:
+        # If it can't be resolved locally, return as-is (e.g. a tag name
+        # that the GitHub API can resolve)
+        return ref
+
+
 def cli(args=None):
     """Process command line arguments."""
     if not args:
@@ -153,12 +165,18 @@ def cli(args=None):
     parser.add_argument("version", help="The version number to include in the changelog")
     args = parser.parse_args()
 
+    # Resolve refs to SHAs so the GitHub API compares the same commits
+    # as the local git log. Without this, refs like HEAD get resolved by
+    # the GitHub API to the default branch instead of the current branch.
+    tag1 = resolve_ref(args.tag1)
+    tag2 = resolve_ref(args.tag2)
+
     token = os.getenv("GITHUB_TOKEN")
     project = "apache/datafusion-comet"
 
     g = Github(token)
     repo = g.get_repo(project)
-    generate_changelog(repo, project, args.tag1, args.tag2, args.version)
+    generate_changelog(repo, project, tag1, tag2, args.version)
 
 if __name__ == "__main__":
     cli()
@@ -24,6 +24,7 @@ spark/src/test/resources/tpcds-query-results/*.out
 spark/src/test/resources/tpcds-micro-benchmarks/*.sql
 spark/src/test/resources/tpcds-plan-stability/approved-plans*/**/explain.txt
 spark/src/test/resources/tpcds-plan-stability/approved-plans*/**/simplified.txt
+spark/src/test/resources/tpcds-plan-stability/approved-plans*/**/extended.txt
 spark/src/test/resources/tpch-query-results/*.out
 spark/src/test/resources/tpch-extended/q*.sql
 spark/src/test/resources/test-data/*.csv
 
@@ -217,8 +217,6 @@ It is important to verify that the new expression is correctly recognized by the
 Create a `.sql` file under the appropriate subdirectory in `spark/src/test/resources/sql-tests/expressions/` (e.g., `string/`, `math/`, `array/`). The file should create a table with test data, then run queries that exercise the expression. Here is an example for the `unhex` expression:
 
 ```sql
--- ConfigMatrix: parquet.enable.dictionary=false,true
-
 statement
 CREATE TABLE test_unhex(col string) USING parquet
 
 
@@ -302,8 +302,13 @@ Creating Nexus staging repository
 In the Nexus repository UI (https://repository.apache.org/) locate and verify the artifacts in
 staging (https://central.sonatype.org/publish/release/#locate-and-examine-your-staging-repository).
 
-If the artifacts appear to be correct, then close and release the repository so it is made visible (this should
-actually happen automatically when running the script).
+The script closes the staging repository but does not release it. Releasing to Maven Central is a manual step
+performed only after the vote passes (see [Publishing Maven Artifacts](#publishing-maven-artifacts) below).
+
+Note that the Maven artifacts are always published under the final release version (e.g. `0.13.0`), not the RC
+version — the `-rc1` / `-rc2` suffix only appears in the git tag and the source tarball in SVN. Because the script
+creates a new staging repository on each run, re-staging the same version for a subsequent RC is supported as long
+as no staging repository for that version has been released to Maven Central.
 
 ### Create the Release Candidate Tarball
 
@@ -345,6 +350,13 @@ If the vote does not pass, address the issues raised, increment the release cand
 the [Tag the Release Candidate](#tag-the-release-candidate) step. For example, the next attempt would be tagged
 `0.13.0-rc2`.
 
+Before staging the next RC, drop the previous RC's staging repository in the
+[Nexus UI](https://repository.apache.org/#stagingRepositories) by selecting it and clicking "Drop". This avoids
+leaving multiple closed staging repositories for the same version and prevents accidentally releasing the wrong
+one when the vote eventually passes. The Maven version (e.g. `0.13.0`) is shared across all RCs, so each run of
+`publish-to-maven.sh` creates a new staging repository for the same GAV — only one of them should ever be
+released to Maven Central.
+
 ## Publishing Binary Releases
 
 Once the vote passes, we can publish the source and binary releases.
 
@@ -26,8 +26,8 @@ helpful to have a roadmap for some of the major items that require coordination
 
 ### Iceberg Integration
 
-Iceberg tables reads are now fully native, powered by a scan operator backed by Iceberg-rust ([#2528]). We anticipate
-major improvements expected in the next few releases, including bringing Iceberg table format V3 features (_e.g._,
+Reads of Iceberg tables with Parquet data files are fully native and enabled by default, powered by a scan operator
+backed by Iceberg-rust ([#2528]). We anticipate major improvements in the next few releases, including bringing Iceberg table format V3 features (_e.g._,
 encryption) to the reader.
 
 [#2528]: https://github.com/apache/datafusion-comet/pull/2528
 
@@ -76,8 +76,6 @@ A test file consists of SQL comments, directives, statements, and queries separa
 lines. Here is a minimal example:
 
 ```sql
--- ConfigMatrix: parquet.enable.dictionary=false,true
-
 statement
 CREATE TABLE test_abs(v double) USING parquet
 
@@ -106,16 +104,19 @@ Runs the entire file once per combination of values. Multiple `ConfigMatrix` lin
 cross product of all combinations.
 
 ```sql
--- ConfigMatrix: parquet.enable.dictionary=false,true
+-- ConfigMatrix: spark.sql.optimizer.inSetConversionThreshold=100,0
 ```
 
 This generates two test cases:
 
 ```
-sql-file: expressions/cast/cast.sql [parquet.enable.dictionary=false]
-sql-file: expressions/cast/cast.sql [parquet.enable.dictionary=true]
+sql-file: expressions/conditional/in_set.sql [spark.sql.optimizer.inSetConversionThreshold=100]
+sql-file: expressions/conditional/in_set.sql [spark.sql.optimizer.inSetConversionThreshold=0]
 ```
 
+Only add a `ConfigMatrix` directive when there is a real reason to run the test under
+multiple configurations. Do not add `ConfigMatrix` directives speculatively.
+
 #### `MinSparkVersion`
 
 Skips the file when running on a Spark version older than the specified version.
@@ -223,12 +224,9 @@ SELECT array(1, 2, 3)[10]
 
 2. Add the Apache license header as a SQL comment.
 
-3. Add a `ConfigMatrix` directive if the test should run with multiple Parquet configurations.
-   Most expression tests use:
-
-   ```sql
-   -- ConfigMatrix: parquet.enable.dictionary=false,true
-   ```
+3. Add a `ConfigMatrix` directive only if the test needs to run under multiple configurations
+   (e.g., testing behavior that varies with a specific Spark config). Do not add `ConfigMatrix`
+   directives speculatively.
 
 4. Create tables and insert test data using `statement` blocks. Include edge cases such as
    `NULL`, boundary values, and negative numbers.