Skip to content

Commit

Permalink
Merge branch 'master' into variant_to_json
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhao-db committed Mar 25, 2024
2 parents c2b9bb5 + d8d119a commit 47250f4
Show file tree
Hide file tree
Showing 32 changed files with 618 additions and 124 deletions.
7 changes: 5 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,11 @@ DSTREAM:
- any-glob-to-any-file: [
'streaming/**/*',
'data/streaming/**/*',
'connector/kinesis*',
'connector/kafka*',
'connector/kinesis-asl/**/*',
'connector/kinesis-asl-assembly/**/*',
'connector/kafka-0-10/**/*',
'connector/kafka-0-10-assembly/**/*',
'connector/kafka-0-10-token-provider/**/*',
'python/pyspark/streaming/**/*'
]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ name: Publish Snapshot

on:
schedule:
- cron: '0 0 * * *'
- cron: '0 0,12 * * *'
workflow_dispatch:
inputs:
branch:
Expand Down
2 changes: 1 addition & 1 deletion assembly/README
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ This module is off by default. To activate it specify the profile in the command

If you need to build an assembly for a different version of Hadoop the
hadoop-version system property needs to be set as in this example:
-Dhadoop.version=3.3.6
-Dhadoop.version=3.4.0
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.unsafe.types;

import org.apache.spark.unsafe.Platform;
import org.apache.spark.variant.Variant;
import org.apache.spark.types.variant.Variant;

import java.io.Serializable;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
* limitations under the License.
*/

package org.apache.spark.variant;
package org.apache.spark.types.variant;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;

import java.io.CharArrayWriter;
import java.io.IOException;

import static org.apache.spark.variant.VariantUtil.*;
import static org.apache.spark.types.variant.VariantUtil.*;

/**
* This class is structurally equivalent to {@link org.apache.spark.unsafe.types.VariantVal}. We
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.variant;
package org.apache.spark.types.variant;

import java.io.IOException;
import java.math.BigDecimal;
Expand All @@ -32,7 +32,7 @@
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.exc.InputCoercionException;

import static org.apache.spark.variant.VariantUtil.*;
import static org.apache.spark.types.variant.VariantUtil.*;

/**
* Build variant value and metadata by parsing JSON values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.variant;
package org.apache.spark.types.variant;

/**
* An exception indicating that we are attempting to build a variant with it value or metadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.variant;
package org.apache.spark.types.variant;

import org.apache.spark.QueryContext;
import org.apache.spark.SparkRuntimeException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', 'fox', " +
"'jumps', 'over', 'the', 'lazy', 'dog', '{\"status\": \"merrily\"}')").executeUpdate()

conn.prepareStatement("CREATE TABLE floats (f1 FLOAT, f2 FLOAT UNSIGNED)").executeUpdate()
conn.prepareStatement("INSERT INTO floats VALUES (1.23, 4.56)").executeUpdate()
conn.prepareStatement("CREATE TABLE floats (f1 FLOAT, f2 FLOAT(10), f3 FLOAT(53), " +
"f4 FLOAT UNSIGNED, f5 FLOAT(10) UNSIGNED, f6 FLOAT(53) UNSIGNED)").executeUpdate()
conn.prepareStatement("INSERT INTO floats VALUES (1.23, 4.56, 7.89, 1.23, 4.56, 7.89)")
.executeUpdate()
}

test("Basic test") {
Expand Down Expand Up @@ -267,6 +269,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {

test("SPARK-47522: Read MySQL FLOAT as FloatType to keep consistent with the write side") {
val df = spark.read.jdbc(jdbcUrl, "floats", new Properties)
checkAnswer(df, Row(1.23f, 4.56d))
checkAnswer(df, Row(1.23f, 4.56f, 7.89d, 1.23d, 4.56d, 7.89d))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -732,9 +732,10 @@ private[spark] class SparkSubmit extends Logging {
}

// Add the application jar automatically so the user doesn't have to call sc.addJar
// For isKubernetesClusterModeDriver, the jar is already added in the previous spark-submit
// For YARN cluster mode, the jar is already distributed on each node as "app.jar"
// For python and R files, the primary resource is already distributed as a regular file
if (!isYarnCluster && !args.isPython && !args.isR) {
if (!isKubernetesClusterModeDriver && !isYarnCluster && !args.isPython && !args.isR) {
var jars = sparkConf.get(JARS)
if (isUserJar(args.primaryResource)) {
jars = jars ++ Seq(args.primaryResource)
Expand Down
19 changes: 19 additions & 0 deletions core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,25 @@ class SparkSubmitSuite
}
}

test("SPARK-47495: Not to add primary resource to jars again" +
" in k8s client mode & driver runs inside a POD") {
val clArgs = Seq(
"--deploy-mode", "client",
"--proxy-user", "test.user",
"--master", "k8s://host:port",
"--executor-memory", "1g",
"--class", "org.SomeClass",
"--driver-memory", "1g",
"--conf", "spark.kubernetes.submitInDriver=true",
"--jars", "src/test/resources/TestUDTF.jar",
"/home/jarToIgnore.jar",
"arg1")
val appArgs = new SparkSubmitArguments(clArgs)
val (_, _, sparkConf, _) = submit.prepareSubmitEnvironment(appArgs)
sparkConf.get("spark.jars").contains("jarToIgnore") shouldBe false
sparkConf.get("spark.jars").contains("TestUDTF") shouldBe true
}

test("SPARK-33782: handles k8s files download to current directory") {
val clArgs = Seq(
"--deploy-mode", "client",
Expand Down
7 changes: 1 addition & 6 deletions dev/.scalafmt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,5 @@ optIn = {
danglingParentheses.preset = false
docstrings.style = Asterisk
maxColumn = 98
runner.dialect = scala212
fileOverride {
"glob:**/src/**/scala-2.13/**.scala" {
runner.dialect = scala213
}
}
runner.dialect = scala213
version = 3.8.0
58 changes: 29 additions & 29 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
aopalliance-repackaged/3.0.3//aopalliance-repackaged-3.0.3.jar
arpack/3.0.3//arpack-3.0.3.jar
arpack_combined_all/0.1//arpack_combined_all-0.1.jar
arrow-format/15.0.0//arrow-format-15.0.0.jar
arrow-memory-core/15.0.0//arrow-memory-core-15.0.0.jar
arrow-memory-netty/15.0.0//arrow-memory-netty-15.0.0.jar
arrow-vector/15.0.0//arrow-vector-15.0.0.jar
arrow-format/15.0.2//arrow-format-15.0.2.jar
arrow-memory-core/15.0.2//arrow-memory-core-15.0.2.jar
arrow-memory-netty/15.0.2//arrow-memory-netty-15.0.2.jar
arrow-vector/15.0.2//arrow-vector-15.0.2.jar
audience-annotations/0.12.0//audience-annotations-0.12.0.jar
avro-ipc/1.11.3//avro-ipc-1.11.3.jar
avro-mapred/1.11.3//avro-mapred-1.11.3.jar
Expand Down Expand Up @@ -197,32 +197,32 @@ metrics-jmx/4.2.25//metrics-jmx-4.2.25.jar
metrics-json/4.2.25//metrics-json-4.2.25.jar
metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar
minlog/1.3.0//minlog-1.3.0.jar
netty-all/4.1.107.Final//netty-all-4.1.107.Final.jar
netty-buffer/4.1.107.Final//netty-buffer-4.1.107.Final.jar
netty-codec-http/4.1.107.Final//netty-codec-http-4.1.107.Final.jar
netty-codec-http2/4.1.107.Final//netty-codec-http2-4.1.107.Final.jar
netty-codec-socks/4.1.107.Final//netty-codec-socks-4.1.107.Final.jar
netty-codec/4.1.107.Final//netty-codec-4.1.107.Final.jar
netty-common/4.1.107.Final//netty-common-4.1.107.Final.jar
netty-handler-proxy/4.1.107.Final//netty-handler-proxy-4.1.107.Final.jar
netty-handler/4.1.107.Final//netty-handler-4.1.107.Final.jar
netty-resolver/4.1.107.Final//netty-resolver-4.1.107.Final.jar
netty-all/4.1.108.Final//netty-all-4.1.108.Final.jar
netty-buffer/4.1.108.Final//netty-buffer-4.1.108.Final.jar
netty-codec-http/4.1.108.Final//netty-codec-http-4.1.108.Final.jar
netty-codec-http2/4.1.108.Final//netty-codec-http2-4.1.108.Final.jar
netty-codec-socks/4.1.108.Final//netty-codec-socks-4.1.108.Final.jar
netty-codec/4.1.108.Final//netty-codec-4.1.108.Final.jar
netty-common/4.1.108.Final//netty-common-4.1.108.Final.jar
netty-handler-proxy/4.1.108.Final//netty-handler-proxy-4.1.108.Final.jar
netty-handler/4.1.108.Final//netty-handler-4.1.108.Final.jar
netty-resolver/4.1.108.Final//netty-resolver-4.1.108.Final.jar
netty-tcnative-boringssl-static/2.0.61.Final//netty-tcnative-boringssl-static-2.0.61.Final.jar
netty-tcnative-boringssl-static/2.0.62.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.62.Final-linux-aarch_64.jar
netty-tcnative-boringssl-static/2.0.62.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.62.Final-linux-x86_64.jar
netty-tcnative-boringssl-static/2.0.62.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.62.Final-osx-aarch_64.jar
netty-tcnative-boringssl-static/2.0.62.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.62.Final-osx-x86_64.jar
netty-tcnative-boringssl-static/2.0.62.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.62.Final-windows-x86_64.jar
netty-tcnative-classes/2.0.62.Final//netty-tcnative-classes-2.0.62.Final.jar
netty-transport-classes-epoll/4.1.107.Final//netty-transport-classes-epoll-4.1.107.Final.jar
netty-transport-classes-kqueue/4.1.107.Final//netty-transport-classes-kqueue-4.1.107.Final.jar
netty-transport-native-epoll/4.1.107.Final/linux-aarch_64/netty-transport-native-epoll-4.1.107.Final-linux-aarch_64.jar
netty-transport-native-epoll/4.1.107.Final/linux-riscv64/netty-transport-native-epoll-4.1.107.Final-linux-riscv64.jar
netty-transport-native-epoll/4.1.107.Final/linux-x86_64/netty-transport-native-epoll-4.1.107.Final-linux-x86_64.jar
netty-transport-native-kqueue/4.1.107.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.107.Final-osx-aarch_64.jar
netty-transport-native-kqueue/4.1.107.Final/osx-x86_64/netty-transport-native-kqueue-4.1.107.Final-osx-x86_64.jar
netty-transport-native-unix-common/4.1.107.Final//netty-transport-native-unix-common-4.1.107.Final.jar
netty-transport/4.1.107.Final//netty-transport-4.1.107.Final.jar
netty-tcnative-boringssl-static/2.0.65.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar
netty-tcnative-boringssl-static/2.0.65.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar
netty-tcnative-boringssl-static/2.0.65.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar
netty-tcnative-boringssl-static/2.0.65.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar
netty-tcnative-boringssl-static/2.0.65.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar
netty-tcnative-classes/2.0.65.Final//netty-tcnative-classes-2.0.65.Final.jar
netty-transport-classes-epoll/4.1.108.Final//netty-transport-classes-epoll-4.1.108.Final.jar
netty-transport-classes-kqueue/4.1.108.Final//netty-transport-classes-kqueue-4.1.108.Final.jar
netty-transport-native-epoll/4.1.108.Final/linux-aarch_64/netty-transport-native-epoll-4.1.108.Final-linux-aarch_64.jar
netty-transport-native-epoll/4.1.108.Final/linux-riscv64/netty-transport-native-epoll-4.1.108.Final-linux-riscv64.jar
netty-transport-native-epoll/4.1.108.Final/linux-x86_64/netty-transport-native-epoll-4.1.108.Final-linux-x86_64.jar
netty-transport-native-kqueue/4.1.108.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.108.Final-osx-aarch_64.jar
netty-transport-native-kqueue/4.1.108.Final/osx-x86_64/netty-transport-native-kqueue-4.1.108.Final-osx-x86_64.jar
netty-transport-native-unix-common/4.1.108.Final//netty-transport-native-unix-common-4.1.108.Final.jar
netty-transport/4.1.108.Final//netty-transport-4.1.108.Final.jar
objenesis/3.3//objenesis-3.3.jar
okhttp/3.12.12//okhttp-3.12.12.jar
okio/1.15.0//okio-1.15.0.jar
Expand Down
10 changes: 0 additions & 10 deletions dev/free_disk_space_container
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,4 @@ rm -rf /__t/CodeQL
rm -rf /__t/go
rm -rf /__t/node

apt-get remove --purge -y '^aspnet.*'
apt-get remove --purge -y '^dotnet-.*'
apt-get remove --purge -y '^llvm-.*'
apt-get remove --purge -y 'php.*'
apt-get remove --purge -y '^mongodb-.*'
apt-get remove --purge -y snapd google-chrome-stable microsoft-edge-stable firefox
apt-get remove --purge -y azure-cli google-cloud-sdk mono-devel powershell libgl1-mesa-dri
apt-get autoremove --purge -y
apt-get clean

df -h
8 changes: 8 additions & 0 deletions dev/infra/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,11 @@ RUN python3.12 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS lxml && \
python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
python3.12 -m pip install torcheval && \
python3.12 -m pip cache purge

# Remove unused installation packages to free up disk space
RUN apt-get remove --purge -y \
'^aspnet.*' '^dotnet-.*' '^llvm-.*' 'php.*' '^mongodb-.*' \
snapd google-chrome-stable microsoft-edge-stable firefox \
azure-cli google-cloud-sdk mono-devel powershell libgl1-mesa-dri || true
RUN apt-get autoremove --purge -y
RUN apt-get clean
2 changes: 1 addition & 1 deletion docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ from `hadoop.version`.

Example:

./build/mvn -Pyarn -Dhadoop.version=3.3.0 -DskipTests clean package
./build/mvn -Pyarn -Dhadoop.version=3.4.0 -DskipTests clean package

## Building With Hive and JDBC Support

Expand Down
2 changes: 1 addition & 1 deletion docs/running-on-kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ A typical example of this using S3 is via passing the following options:

```
...
--packages org.apache.hadoop:hadoop-aws:3.2.2
--packages org.apache.hadoop:hadoop-aws:3.4.0
--conf spark.kubernetes.file.upload.path=s3a://<s3-bucket>/path
--conf spark.hadoop.fs.s3a.access.key=...
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
Expand Down
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,14 @@
<bouncycastle.version>1.77</bouncycastle.version>
<tink.version>1.9.0</tink.version>
<datasketches.version>5.0.1</datasketches.version>
<netty.version>4.1.107.Final</netty.version>
<netty-tcnative.version>2.0.62.Final</netty-tcnative.version>
<netty.version>4.1.108.Final</netty.version>
<netty-tcnative.version>2.0.65.Final</netty-tcnative.version>
<icu4j.version>72.1</icu4j.version>
<!--
If you are changing Arrow version specification, please check
./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
-->
<arrow.version>15.0.0</arrow.version>
<arrow.version>15.0.2</arrow.version>
<ammonite.version>3.0.0-M1</ammonite.version>

<!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
Expand Down Expand Up @@ -1202,7 +1202,7 @@
<groupId>org.jmock</groupId>
<artifactId>jmock-junit5</artifactId>
<scope>test</scope>
<version>2.12.0</version>
<version>2.13.1</version>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
Expand Down
Loading

0 comments on commit 47250f4

Please sign in to comment.