Skip to content

Commit 2cb6142

Browse files
authored
test: re-enable sql_hive-1 for Spark 4.0 and fix two small failures (#4047)
1 parent 3be3a34 commit 2cb6142

3 files changed

Lines changed: 36 additions & 15 deletions

File tree

.github/workflows/spark_sql_test.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,6 @@ jobs:
141141
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'}
142142
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
143143
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
144-
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
145-
exclude:
146-
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
147-
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
148144
fail-fast: false
149145
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
150146
runs-on: ${{ matrix.os }}

dev/diffs/4.0.1.diff

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3830,16 +3830,44 @@ index 52abd248f3a..b4e096cae24 100644
38303830
case d: DynamicPruningExpression => d.child
38313831
}
38323832
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
3833-
index 4b27082e188..6710c90c789 100644
3833+
index 4b27082e188..057b2430872 100644
38343834
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
38353835
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
3836-
@@ -147,7 +147,9 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS
3836+
@@ -17,7 +17,7 @@
3837+
3838+
package org.apache.spark.sql.hive
3839+
3840+
-import org.apache.spark.sql.{QueryTest, Row}
3841+
+import org.apache.spark.sql.{IgnoreCometSuite, QueryTest, Row}
3842+
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
3843+
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
3844+
import org.apache.spark.sql.hive.test.TestHiveSingleton
3845+
@@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{IntegerType, StringType}
3846+
import org.apache.spark.util.ArrayImplicits._
3847+
import org.apache.spark.util.Utils
3848+
3849+
-class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
3850+
+// Comet: mix in IgnoreCometSuite so these tests are reported as ignored when Comet is enabled
3851+
+// (ENABLE_COMET=true). The jar these tests depend on (`hive-test-udfs.jar`) is stripped from the
3852+
+// Spark 4.0.1 release source tag per the ASF binary-artifact policy, so the tests cannot run in
3853+
+// Comet's CI. Ignoring keeps the suite passing without masking real regressions; the upstream
3854+
+// tests still run in non-Comet Spark builds that ship the jar on branch-4.0.
3855+
+class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
3856+
+ with IgnoreCometSuite {
3857+
3858+
case class UDFTestInformation(
3859+
identifier: String,
3860+
@@ -147,7 +153,13 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS
38373861

38383862
// This jar file should not be placed to the classpath.
38393863
val jarPath = "src/test/noclasspath/hive-test-udfs.jar"
38403864
- assume(new java.io.File(jarPath).exists)
3841-
+ // Comet: hive-test-udfs.jar files has been removed from Apache Spark repository
3842-
+ // comment out the following line for now
3865+
+ // Comet: the upstream `assume(...)` runs here in the suite constructor (inside this foreach,
3866+
+ // before `test(...)` registers a case). When the jar is missing - as it is on the v4.0.1
3867+
+ // release tag - `assume` throws TestCanceledException out of `<init>`, which ScalaTest
3868+
+ // reports as a suite abort (not a per-test cancel) and fails the whole job. The
3869+
+ // IgnoreCometSuite mixin above already reroutes these tests to `ignore` under Comet, so
3870+
+ // the jar presence check is unnecessary; comment it out to avoid the constructor-time abort.
38433871
+ // assume(new java.io.File(jarPath).exists)
38443872
val jarUrl = s"file://${System.getProperty("user.dir")}/$jarPath"
38453873

spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919

2020
package org.apache.spark.sql.comet.shims
2121

22+
import java.io.FileNotFoundException
23+
2224
import scala.util.matching.Regex
2325

2426
import org.apache.spark.QueryContext
2527
import org.apache.spark.SparkException
26-
import org.apache.spark.SparkFileNotFoundException
2728
import org.apache.spark.sql.errors.QueryExecutionErrors
2829
import org.apache.spark.sql.types._
2930
import org.apache.spark.unsafe.types.UTF8String
@@ -292,17 +293,13 @@ trait ShimSparkErrorConverter {
292293

293294
case "FileNotFound" =>
294295
val msg = params("message").toString
295-
// Extract file path from native error message and format like Hadoop's
296-
// FileNotFoundException: "File <path> does not exist"
297296
val path = ShimSparkErrorConverter.ObjectLocationPattern
298297
.findFirstMatchIn(msg)
299298
.map(_.group(1))
300299
.getOrElse(msg)
301-
// readCurrentFileNotFoundError was removed in Spark 4.0; construct directly
302300
Some(
303-
new SparkFileNotFoundException(
304-
errorClass = "_LEGACY_ERROR_TEMP_2055",
305-
messageParameters = Map("message" -> s"File $path does not exist")))
301+
QueryExecutionErrors
302+
.fileNotExistError(path, new FileNotFoundException(s"File $path does not exist")))
306303

307304
case _ =>
308305
// Unknown error type - return None to trigger fallback

0 commit comments

Comments
 (0)