Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
sunchao committed Mar 6, 2024
1 parent 0d64757 commit 17d638f
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 25 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ jobs:
- name: Run Spark sql/core-3 tests
run: |
cd apache-spark
ENABLE_COMET=false build/sbt sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest
ENABLE_COMET=false build/sbt "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"
spark-sql-hive-1:
strategy:
Expand Down Expand Up @@ -180,6 +180,7 @@ jobs:
comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
- name: Run Spark sql/hive-1 tests
run: |
localedef -c -f UTF-8 -i en_US en_US.UTF-8
cd apache-spark
ENABLE_COMET=false build/sbt hive/test -Dtest.exclude.tags=org.apache.spark.tags.ExtendedHiveTest
Expand Down Expand Up @@ -209,6 +210,7 @@ jobs:
comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
- name: Run Spark sql/hive-2 tests
run: |
localedef -c -f UTF-8 -i en_US en_US.UTF-8
cd apache-spark
ENABLE_COMET=false build/sbt "hive/testOnly *.HiveSparkSubmitSuite *.VersionsSuite *.HiveDDLSuite *.HiveCatalogedDDLSuite *.HiveSerDeSuite *.HiveQuerySuite *.SQLQuerySuite"
98 changes: 74 additions & 24 deletions dev/diffs/3.4.2.diff
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,30 @@ index bd9c79e5b96..ab7584e768e 100644
}
assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 1d2e467c94c..77a119505b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
import org.mockito.Mockito.{mock, when}

import org.apache.spark.SparkException
-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.{DataFrame, DisableCometSuite, QueryTest, Row}
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.execution.datasources.PartitionedFile
import org.apache.spark.sql.functions.col
@@ -38,7 +38,9 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

-class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
+// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
+// Since it isn't related to Comet, we disable it for now.
+class BinaryFileFormatSuite extends QueryTest with SharedSparkSession with DisableCometSuite {
import BinaryFileFormat._

private var testDir: String = _
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
index 07e2849ce6f..264fb61db16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
Expand Down Expand Up @@ -774,29 +798,26 @@ index 26e61c6b58d..2a7c96d164a 100644
spark.range(10).selectExpr("id", "id % 3 as p")
.write.partitionBy("p").saveAsTable("testDataForScan")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
index 0ab8691801d..df9e47fdc7a 100644
index 0ab8691801d..7b8590ee6e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
@@ -18,9 +18,9 @@
@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.python

import org.apache.spark.sql.catalyst.plans.logical.{ArrowEvalPython, BatchEvalPython, Limit, LocalLimit}
+import org.apache.spark.sql.comet.CometScanExec
+import org.apache.spark.sql.comet._
import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan, SparkPlanTest}
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
-import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
@@ -108,6 +108,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -108,6 +109,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {

val scanNodes = query.queryExecution.executedPlan.collect {
case scan: FileSourceScanExec => scan
+ case scan: CometScanExec => scan
}
assert(scanNodes.length == 1)
assert(scanNodes.head.output.map(_.name) == Seq("a"))
@@ -120,11 +121,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
@@ -120,11 +122,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {

val scanNodes = query.queryExecution.executedPlan.collect {
case scan: FileSourceScanExec => scan
Expand All @@ -815,34 +836,38 @@ index 0ab8691801d..df9e47fdc7a 100644
}
}
}
@@ -145,6 +151,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
@@ -145,6 +152,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {

val scanNodes = query.queryExecution.executedPlan.collect {
case scan: BatchScanExec => scan
+ case scan: CometScanExec => scan
}
assert(scanNodes.length == 1)
assert(scanNodes.head.output.map(_.name) == Seq("a"))
@@ -157,12 +164,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
@@ -157,6 +165,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {

val scanNodes = query.queryExecution.executedPlan.collect {
case scan: BatchScanExec => scan
+ case scan: CometScanExec => scan
+ case scan: CometBatchScanExec => scan
}
assert(scanNodes.length == 1)
// $"a" is not null and $"a" > 1
- val filters = scanNodes.head.scan.asInstanceOf[ParquetScan].pushedFilters
- assert(filters.length == 2)
- assert(filters.flatMap(_.references).distinct === Array("a"))
+ val dataFilters = scanNodes.head match {
+ case scan: FileSourceScanExec => scan.dataFilters
+ case scan: CometScanExec => scan.dataFilters
+ }
+ assert(dataFilters.length == 2)
+ assert(dataFilters.flatMap(_.references.map(_.name)).distinct == Seq("a"))
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
index d083cac48ff..43057eb251b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
@@ -37,8 +37,10 @@ import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException,
import org.apache.spark.sql.streaming.util.StreamManualClock
import org.apache.spark.util.Utils

+// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
+// Since it isn't related to Comet, we disable it for now.
class AsyncProgressTrackingMicroBatchExecutionSuite
- extends StreamTest with BeforeAndAfter with Matchers {
+ extends StreamTest with BeforeAndAfter with Matchers with DisableCometSuite {

import testImplicits._

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 266bb343526..85ec36db996 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
Expand Down Expand Up @@ -954,6 +979,31 @@ index 266bb343526..85ec36db996 100644
} else {
assert(scans.isEmpty)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index b5f6d2f9f68..8e84ec3f070 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.sources
import java.io.File

import org.apache.spark.SparkException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, DisableCometSuite}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTableType}
import org.apache.spark.sql.catalyst.parser.ParseException
@@ -28,7 +28,10 @@ import org.apache.spark.sql.internal.SQLConf.BUCKETING_MAX_BUCKETS
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.util.Utils

-class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
+// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
+// Since it isn't related to Comet, we disable it for now.
+class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession
+ with DisableCometSuite {
import testImplicits._

protected override lazy val sql = spark.sql _
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
index 1f55742cd67..42377f7cf26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
Expand Down

0 comments on commit 17d638f

Please sign in to comment.