Skip to content

Commit c89c961

Browse files
andygroveclaude
andcommitted
fix: unignore 5 Parquet schema validation tests in 3.5.8.diff
Remove IgnoreCometNativeDataFusion annotations for tests that are now passing with the schema validation fix for #3311: - ParquetIOSuite: SPARK-35640 read binary as timestamp - ParquetIOSuite: SPARK-35640 int as long - ParquetQuerySuite: SPARK-36182 can't read TimestampLTZ as TimestampNTZ - ParquetQuerySuite: row group skipping doesn't overflow - ParquetFilterSuite: SPARK-25207 duplicate fields in case-insensitive mode Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 63b1c35 commit c89c961

1 file changed

Lines changed: 13 additions & 63 deletions

File tree

dev/diffs/3.5.8.diff

Lines changed: 13 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,7 +2066,7 @@ index 07e2849ce6f..3e73645b638 100644
20662066
ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
20672067
)
20682068
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
2069-
index 8e88049f51e..49f2001dc6b 100644
2069+
index 8e88049f51e..e21a5797996 100644
20702070
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
20712071
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
20722072
@@ -1095,7 +1095,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
@@ -2155,17 +2155,7 @@ index 8e88049f51e..49f2001dc6b 100644
21552155
val schema = StructType(Seq(
21562156
StructField("a", IntegerType, nullable = false)
21572157
))
2158-
@@ -1933,7 +1949,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2159-
}
2160-
}
2161-
2162-
- test("SPARK-25207: exception when duplicate fields in case-insensitive mode") {
2163-
+ test("SPARK-25207: exception when duplicate fields in case-insensitive mode",
2164-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
2165-
withTempPath { dir =>
2166-
val count = 10
2167-
val tableName = "spark_25207"
2168-
@@ -1984,7 +2001,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2158+
@@ -1984,7 +2000,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
21692159
}
21702160
}
21712161

@@ -2175,7 +2165,7 @@ index 8e88049f51e..49f2001dc6b 100644
21752165
// block 1:
21762166
// null count min max
21772167
// page-0 0 0 99
2178-
@@ -2044,7 +2062,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2168+
@@ -2044,7 +2061,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
21792169
}
21802170
}
21812171

@@ -2185,7 +2175,7 @@ index 8e88049f51e..49f2001dc6b 100644
21852175
withTempPath { dir =>
21862176
val path = dir.getCanonicalPath
21872177
spark.range(100).selectExpr("id * 2 AS id")
2188-
@@ -2276,7 +2295,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
2178+
@@ -2276,7 +2294,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
21892179
assert(pushedParquetFilters.exists(_.getClass === filterClass),
21902180
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
21912181

@@ -2198,7 +2188,7 @@ index 8e88049f51e..49f2001dc6b 100644
21982188
} else {
21992189
assert(selectedFilters.isEmpty, "There is filter pushed down")
22002190
}
2201-
@@ -2336,7 +2359,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
2191+
@@ -2336,7 +2358,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
22022192
assert(pushedParquetFilters.exists(_.getClass === filterClass),
22032193
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
22042194

@@ -2212,30 +2202,10 @@ index 8e88049f51e..49f2001dc6b 100644
22122202
case _ =>
22132203
throw new AnalysisException("Can not match ParquetTable in the query.")
22142204
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
2215-
index 8ed9ef1630e..f312174b182 100644
2205+
index 8ed9ef1630e..eed2a6f5ad5 100644
22162206
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
22172207
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
2218-
@@ -1064,7 +1064,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
2219-
}
2220-
}
2221-
2222-
- test("SPARK-35640: read binary as timestamp should throw schema incompatible error") {
2223-
+ test("SPARK-35640: read binary as timestamp should throw schema incompatible error",
2224-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
2225-
val data = (1 to 4).map(i => Tuple1(i.toString))
2226-
val readSchema = StructType(Seq(StructField("_1", DataTypes.TimestampType)))
2227-
2228-
@@ -1075,7 +1076,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
2229-
}
2230-
}
2231-
2232-
- test("SPARK-35640: int as long should throw schema incompatible error") {
2233-
+ test("SPARK-35640: int as long should throw schema incompatible error",
2234-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
2235-
val data = (1 to 4).map(i => Tuple1(i))
2236-
val readSchema = StructType(Seq(StructField("_1", DataTypes.LongType)))
2237-
2238-
@@ -1345,7 +1347,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
2208+
@@ -1345,7 +1345,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
22392209
}
22402210
}
22412211

@@ -2246,20 +2216,10 @@ index 8ed9ef1630e..f312174b182 100644
22462216
checkAnswer(
22472217
// "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
22482218
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
2249-
index f6472ba3d9d..ce39ebb52e6 100644
2219+
index f6472ba3d9d..18295e0b0f0 100644
22502220
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
22512221
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
2252-
@@ -185,7 +185,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2253-
}
2254-
}
2255-
2256-
- test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ") {
2257-
+ test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ",
2258-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
2259-
val data = (1 to 1000).map { i =>
2260-
val ts = new java.sql.Timestamp(i)
2261-
Row(ts)
2262-
@@ -998,7 +999,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2222+
@@ -998,7 +998,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
22632223
}
22642224
}
22652225

@@ -2269,7 +2229,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
22692229
withAllParquetReaders {
22702230
withTempPath { path =>
22712231
// Repeated values for dictionary encoding.
2272-
@@ -1051,7 +1053,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2232+
@@ -1051,7 +1052,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
22732233
testMigration(fromTsType = "TIMESTAMP_MICROS", toTsType = "INT96")
22742234
}
22752235

@@ -2279,7 +2239,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
22792239
def readParquet(schema: String, path: File): DataFrame = {
22802240
spark.read.schema(schema).parquet(path.toString)
22812241
}
2282-
@@ -1067,7 +1070,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2242+
@@ -1067,7 +1069,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
22832243
checkAnswer(readParquet(schema, path), df)
22842244
}
22852245

@@ -2289,7 +2249,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
22892249
val schema1 = "a DECIMAL(3, 2), b DECIMAL(18, 3), c DECIMAL(37, 3)"
22902250
checkAnswer(readParquet(schema1, path), df)
22912251
val schema2 = "a DECIMAL(3, 0), b DECIMAL(18, 1), c DECIMAL(37, 1)"
2292-
@@ -1089,7 +1093,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2252+
@@ -1089,7 +1092,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
22932253
val df = sql(s"SELECT 1 a, 123456 b, ${Int.MaxValue.toLong * 10} c, CAST('1.2' AS BINARY) d")
22942254
df.write.parquet(path.toString)
22952255

@@ -2299,17 +2259,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
22992259
checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
23002260
checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
23012261
checkAnswer(readParquet("b DECIMAL(11, 1)", path), sql("SELECT 123456.0"))
2302-
@@ -1133,7 +1138,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2303-
}
2304-
}
2305-
2306-
- test("row group skipping doesn't overflow when reading into larger type") {
2307-
+ test("row group skipping doesn't overflow when reading into larger type",
2308-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
2309-
withTempPath { path =>
2310-
Seq(0).toDF("a").write.parquet(path.toString)
2311-
// The vectorized and non-vectorized readers will produce different exceptions, we don't need
2312-
@@ -1148,7 +1154,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
2262+
@@ -1148,7 +1152,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
23132263
.where(s"a < ${Long.MaxValue}")
23142264
.collect()
23152265
}

0 commit comments

Comments
 (0)