diff --git a/connect/common/src/main/protobuf/spark/connect/expressions.proto b/connect/common/src/main/protobuf/spark/connect/expressions.proto index 860e923576161..3a91371fd3b25 100644 --- a/connect/common/src/main/protobuf/spark/connect/expressions.proto +++ b/connect/common/src/main/protobuf/spark/connect/expressions.proto @@ -51,6 +51,7 @@ message Expression { CallFunction call_function = 16; NamedArgumentExpression named_argument_expression = 17; MergeAction merge_action = 19; + TypedAggregateExpression typed_aggregate_expression = 20; // This field is used to mark extensions to the protocol. When plugins generate arbitrary // relations they can add them here. During the planning the correct resolution is done. @@ -402,6 +403,11 @@ message JavaUDF { bool aggregate = 3; } +message TypedAggregateExpression { + // (Required) The aggregate function object packed into bytes. + ScalarScalaUDF scalar_scala_udf = 1; +} + message CallFunction { // (Required) Unparsed name of the SQL function. string function_name = 1; diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 449e923beae3a..4702f09a14c29 100644 --- a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -51,7 +51,7 @@ import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, Mu import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate} import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils} import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin} import org.apache.spark.sql.catalyst.plans.logical @@ -67,6 +67,7 @@ import org.apache.spark.sql.connect.service.{ExecuteHolder, SessionHolder, Spark import org.apache.spark.sql.connect.utils.MetricGenerator import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.QueryExecution +import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression import org.apache.spark.sql.execution.arrow.ArrowConverters import org.apache.spark.sql.execution.command.CreateViewCommand import org.apache.spark.sql.execution.datasources.LogicalRelation @@ -1455,7 +1456,7 @@ class SparkConnectPlanner( } val projection = rel.getExpressionsList.asScala.toSeq - .map(transformExpression) + .map(transformExpression(_, Some(baseRel))) .map(toNamedExpression) logical.Project(projectList = projection, child = baseRel) @@ -1472,21 +1473,40 @@ class SparkConnectPlanner( * Catalyst expression */ @DeveloperApi - def transformExpression(exp: proto.Expression): Expression = if (exp.hasCommon) { + def transformExpression(exp: proto.Expression): Expression = transformExpression(exp, None) + + /** + * Transforms an input protobuf expression into the Catalyst expression. This is usually not + * called directly. Typically the planner will traverse the expressions automatically, only + * plugins are expected to manually perform expression transformations. + * + * @param exp + * the input expression + * @param baseRelationOpt + * inputs of the base relation that contains this expression + * @return + * Catalyst expression + */ + @DeveloperApi + def transformExpression( + exp: proto.Expression, + baseRelationOpt: Option[LogicalPlan]): Expression = if (exp.hasCommon) { try { val origin = exp.getCommon.getOrigin PySparkCurrentOrigin.set( origin.getPythonOrigin.getFragment, origin.getPythonOrigin.getCallSite) - withOrigin { doTransformExpression(exp) } + withOrigin { doTransformExpression(exp, baseRelationOpt) } } finally { PySparkCurrentOrigin.clear() } } else { - doTransformExpression(exp) + doTransformExpression(exp, baseRelationOpt) } - private def doTransformExpression(exp: proto.Expression): Expression = { + private def doTransformExpression( + exp: proto.Expression, + baseRelationOpt: Option[LogicalPlan]): Expression = { exp.getExprTypeCase match { case proto.Expression.ExprTypeCase.LITERAL => transformLiteral(exp.getLiteral) case proto.Expression.ExprTypeCase.UNRESOLVED_ATTRIBUTE => @@ -1523,6 +1543,8 @@ class SparkConnectPlanner( transformNamedArgumentExpression(exp.getNamedArgumentExpression) case proto.Expression.ExprTypeCase.MERGE_ACTION => transformMergeAction(exp.getMergeAction) + case proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION => + transformTypedAggregateExpression(exp.getTypedAggregateExpression, baseRelationOpt) case _ => throw InvalidPlanInput( s"Expression with ID: ${exp.getExprTypeCase.getNumber} is not supported") @@ -2584,8 +2606,35 @@ class SparkConnectPlanner( if expr.getUnresolvedFunction.getFunctionName == "reduce" => // The reduce func needs the input data attribute, thus handle it specially here transformTypedReduceExpression(expr.getUnresolvedFunction, plan.output) - case _ => transformExpression(expr) + case _ => transformExpression(expr, Some(plan)) + } + } + + private def transformTypedAggregateExpression( + expr: proto.TypedAggregateExpression, + baseRelationOpt: Option[LogicalPlan]): AggregateExpression = { + val udf = expr.getScalarScalaUdf + assert(udf.getAggregate) + + val udfPacket = unpackScalaUDF[UdfPacket](udf) + assert(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder") + + val aggregator = udfPacket.function.asInstanceOf[Aggregator[Any, Any, Any]] + val tae = + TypedAggregateExpression(aggregator)(aggregator.bufferEncoder, aggregator.outputEncoder) + val taeWithInput = baseRelationOpt match { + case Some(baseRelation) => + val inputEncoder = TypedScalaUdf.encoderFor( + udfPacket.inputEncoders.head, + "input", + Some(baseRelation.output)) + TypedAggUtils + .withInputType(tae, inputEncoder, baseRelation.output) + .asInstanceOf[TypedAggregateExpression] + case _ => + tae } + taeWithInput.toAggregateExpression() } private def transformMergeAction(action: proto.MergeAction): MergeAction = { diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala index 91c8fb57c31bf..3dabcdef1567e 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala @@ -17,7 +17,11 @@ package org.apache.spark.sql.expressions -import org.apache.spark.sql.{Encoder, TypedColumn} +import scala.reflect.runtime.universe._ + +import org.apache.spark.connect.proto +import org.apache.spark.sql.{encoderFor, Encoder, TypedColumn} +import org.apache.spark.sql.catalyst.ScalaReflection /** * A base class for user-defined aggregations, which can be used in `Dataset` operations to take @@ -92,9 +96,52 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable { def outputEncoder: Encoder[OUT] /** - * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`. operations. + * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset` operations. + * @since 4.0.0 */ def toColumn: TypedColumn[IN, OUT] = { - throw new UnsupportedOperationException("toColumn is not implemented.") + val ttpe = getInputTypeTag[IN] + val inputEncoder = ScalaReflection.encoderFor(ttpe) + val udaf = + ScalaUserDefinedFunction( + this, + Seq(inputEncoder), + encoderFor(outputEncoder), + aggregate = true) + + val builder = proto.TypedAggregateExpression.newBuilder() + builder.setScalarScalaUdf(udaf.udf) + val expr = proto.Expression.newBuilder().setTypedAggregateExpression(builder).build() + + new TypedColumn(expr, encoderFor(outputEncoder)) + } + + private final def getInputTypeTag[T]: TypeTag[T] = { + val mirror = runtimeMirror(this.getClass.getClassLoader) + val tpe = mirror.classSymbol(this.getClass).toType + // Find the most generic (last in the tree) Aggregator class + val baseAgg = + tpe.baseClasses + .findLast(_.asClass.toType <:< typeOf[Aggregator[_, _, _]]) + .getOrElse(throw new IllegalStateException("Could not find the Aggregator base class.")) + val typeArgs = tpe.baseType(baseAgg).typeArgs + assert( + typeArgs.length == 3, + s"Aggregator should have 3 type arguments, " + + s"but found ${typeArgs.length}: ${typeArgs.mkString}.") + val inType = typeArgs.head + + import scala.reflect.api._ + TypeTag( + mirror, + new TypeCreator { + def apply[U <: Universe with Singleton](m: Mirror[U]): U#Type = + if (m eq mirror) { + inType.asInstanceOf[U#Type] + } else { + throw new IllegalArgumentException( + s"Type tag defined in $mirror cannot be migrated to other mirrors.") + } + }) } } diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala index f4499858306a1..dcf7f67551d30 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala @@ -107,7 +107,7 @@ case class ScalaUserDefinedFunction private[sql] ( aggregate: Boolean) extends UserDefinedFunction { - private[this] lazy val udf = { + private[expressions] lazy val udf = { val scalaUdfBuilder = proto.ScalarScalaUDF .newBuilder() .setPayload(ByteString.copyFrom(serializedUdfPacket)) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala index 4032a9499c448..4aec0e6348c09 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala @@ -367,17 +367,7 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession test("UDAF custom Aggregator - case class as input types") { val session: SparkSession = spark import session.implicits._ - val agg = new Aggregator[UdafTestInput, (Long, Long), Long] { - override def zero: (Long, Long) = (0L, 0L) - override def reduce(b: (Long, Long), a: UdafTestInput): (Long, Long) = - (b._1 + a.id, b._2 + a.extra) - override def merge(b1: (Long, Long), b2: (Long, Long)): (Long, Long) = - (b1._1 + b2._1, b1._2 + b2._2) - override def finish(reduction: (Long, Long)): Long = reduction._1 + reduction._2 - override def bufferEncoder: Encoder[(Long, Long)] = - Encoders.tuple(Encoders.scalaLong, Encoders.scalaLong) - override def outputEncoder: Encoder[Long] = Encoders.scalaLong - } + val agg = new CompleteUdafTestInputAggregator() spark.udf.register("agg", udaf(agg)) val result = spark .range(10) @@ -388,6 +378,66 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession .head() assert(result == 135) // 45 + 90 } + + test("UDAF custom Aggregator - toColumn") { + val session: SparkSession = spark + import session.implicits._ + val aggCol = new CompleteUdafTestInputAggregator().toColumn + val ds = spark.range(10).withColumn("extra", col("id") * 2).as[UdafTestInput] + + assert(ds.select(aggCol).head() == 135) // 45 + 90 + assert(ds.agg(aggCol).head().getLong(0) == 135) // 45 + 90 + } + + test("UDAF custom Aggregator - multiple extends - toColumn") { + val session: SparkSession = spark + import session.implicits._ + val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn + val ds = spark.range(10).withColumn("extra", col("id") * 2).as[UdafTestInput] + + assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4 + assert(ds.agg(aggCol).head().getLong(0) == 540) // (45 + 90) * 4 + } } case class UdafTestInput(id: Long, extra: Long) + +// An Aggregator that takes [[UdafTestInput]] as input. +final class CompleteUdafTestInputAggregator + extends Aggregator[UdafTestInput, (Long, Long), Long] { + override def zero: (Long, Long) = (0L, 0L) + override def reduce(b: (Long, Long), a: UdafTestInput): (Long, Long) = + (b._1 + a.id, b._2 + a.extra) + override def merge(b1: (Long, Long), b2: (Long, Long)): (Long, Long) = + (b1._1 + b2._1, b1._2 + b2._2) + override def finish(reduction: (Long, Long)): Long = reduction._1 + reduction._2 + override def bufferEncoder: Encoder[(Long, Long)] = + Encoders.tuple(Encoders.scalaLong, Encoders.scalaLong) + override def outputEncoder: Encoder[Long] = Encoders.scalaLong +} + +// Same as [[CompleteUdafTestInputAggregator]] but the input type is not defined. +abstract class IncompleteUdafTestInputAggregator[T] extends Aggregator[T, (Long, Long), Long] { + override def zero: (Long, Long) = (0L, 0L) + override def reduce(b: (Long, Long), a: T): (Long, Long) // Incomplete! + override def merge(b1: (Long, Long), b2: (Long, Long)): (Long, Long) = + (b1._1 + b2._1, b1._2 + b2._2) + override def finish(reduction: (Long, Long)): Long = reduction._1 + reduction._2 + override def bufferEncoder: Encoder[(Long, Long)] = + Encoders.tuple(Encoders.scalaLong, Encoders.scalaLong) + override def outputEncoder: Encoder[Long] = Encoders.scalaLong +} + +// A layer over [[IncompleteUdafTestInputAggregator]] but the input type is still not defined. +abstract class IncompleteChildUdafTestInputAggregator[T] + extends IncompleteUdafTestInputAggregator[T] { + override def finish(reduction: (Long, Long)): Long = (reduction._1 + reduction._2) * 2 +} + +// Another layer that finally defines the input type. +final class CompleteGrandChildUdafTestInputAggregator + extends IncompleteChildUdafTestInputAggregator[UdafTestInput] { + override def reduce(b: (Long, Long), a: UdafTestInput): (Long, Long) = + (b._1 + a.id, b._2 + a.extra) + override def finish(reduction: (Long, Long)): Long = (reduction._1 + reduction._2) * 4 +} diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index b4c4b48de2688..1c1ad2b6ecec5 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -34,7 +34,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xd8/\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_conditionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xc1\x30\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_conditionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -47,75 +47,77 @@ b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated" ) _EXPRESSION._serialized_start = 133 - _EXPRESSION._serialized_end = 6237 - _EXPRESSION_WINDOW._serialized_start = 1795 - _EXPRESSION_WINDOW._serialized_end = 2578 - _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 2085 - _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2578 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2352 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2497 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2499 - _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2578 - _EXPRESSION_SORTORDER._serialized_start = 2581 - _EXPRESSION_SORTORDER._serialized_end = 3006 - _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2811 - _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2919 - _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2921 - _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 3006 - _EXPRESSION_CAST._serialized_start = 3009 - _EXPRESSION_CAST._serialized_end = 3324 - _EXPRESSION_CAST_EVALMODE._serialized_start = 3210 - _EXPRESSION_CAST_EVALMODE._serialized_end = 3308 - _EXPRESSION_LITERAL._serialized_start = 3327 - _EXPRESSION_LITERAL._serialized_end = 4890 - _EXPRESSION_LITERAL_DECIMAL._serialized_start = 4162 - _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4279 - _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4281 - _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4379 - _EXPRESSION_LITERAL_ARRAY._serialized_start = 4382 - _EXPRESSION_LITERAL_ARRAY._serialized_end = 4512 - _EXPRESSION_LITERAL_MAP._serialized_start = 4515 - _EXPRESSION_LITERAL_MAP._serialized_end = 4742 - _EXPRESSION_LITERAL_STRUCT._serialized_start = 4745 - _EXPRESSION_LITERAL_STRUCT._serialized_end = 4874 - _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4893 - _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 5079 - _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 5082 - _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5286 - _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5288 - _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5338 - _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5340 - _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5464 - _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5466 - _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5552 - _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5555 - _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5687 - _EXPRESSION_UPDATEFIELDS._serialized_start = 5690 - _EXPRESSION_UPDATEFIELDS._serialized_end = 5877 - _EXPRESSION_ALIAS._serialized_start = 5879 - _EXPRESSION_ALIAS._serialized_end = 5999 - _EXPRESSION_LAMBDAFUNCTION._serialized_start = 6002 - _EXPRESSION_LAMBDAFUNCTION._serialized_end = 6160 - _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 6162 - _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6224 - _EXPRESSIONCOMMON._serialized_start = 6239 - _EXPRESSIONCOMMON._serialized_end = 6304 - _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6307 - _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6671 - _PYTHONUDF._serialized_start = 6674 - _PYTHONUDF._serialized_end = 6878 - _SCALARSCALAUDF._serialized_start = 6881 - _SCALARSCALAUDF._serialized_end = 7095 - _JAVAUDF._serialized_start = 7098 - _JAVAUDF._serialized_end = 7247 - _CALLFUNCTION._serialized_start = 7249 - _CALLFUNCTION._serialized_end = 7357 - _NAMEDARGUMENTEXPRESSION._serialized_start = 7359 - _NAMEDARGUMENTEXPRESSION._serialized_end = 7451 - _MERGEACTION._serialized_start = 7454 - _MERGEACTION._serialized_end = 7966 - _MERGEACTION_ASSIGNMENT._serialized_start = 7676 - _MERGEACTION_ASSIGNMENT._serialized_end = 7782 - _MERGEACTION_ACTIONTYPE._serialized_start = 7785 - _MERGEACTION_ACTIONTYPE._serialized_end = 7952 + _EXPRESSION._serialized_end = 6342 + _EXPRESSION_WINDOW._serialized_start = 1900 + _EXPRESSION_WINDOW._serialized_end = 2683 + _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 2190 + _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2683 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2457 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2602 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2604 + _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2683 + _EXPRESSION_SORTORDER._serialized_start = 2686 + _EXPRESSION_SORTORDER._serialized_end = 3111 + _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2916 + _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 3024 + _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 3026 + _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 3111 + _EXPRESSION_CAST._serialized_start = 3114 + _EXPRESSION_CAST._serialized_end = 3429 + _EXPRESSION_CAST_EVALMODE._serialized_start = 3315 + _EXPRESSION_CAST_EVALMODE._serialized_end = 3413 + _EXPRESSION_LITERAL._serialized_start = 3432 + _EXPRESSION_LITERAL._serialized_end = 4995 + _EXPRESSION_LITERAL_DECIMAL._serialized_start = 4267 + _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4384 + _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4386 + _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4484 + _EXPRESSION_LITERAL_ARRAY._serialized_start = 4487 + _EXPRESSION_LITERAL_ARRAY._serialized_end = 4617 + _EXPRESSION_LITERAL_MAP._serialized_start = 4620 + _EXPRESSION_LITERAL_MAP._serialized_end = 4847 + _EXPRESSION_LITERAL_STRUCT._serialized_start = 4850 + _EXPRESSION_LITERAL_STRUCT._serialized_end = 4979 + _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4998 + _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 5184 + _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 5187 + _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5391 + _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5393 + _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5443 + _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5445 + _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5569 + _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5571 + _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5657 + _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5660 + _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5792 + _EXPRESSION_UPDATEFIELDS._serialized_start = 5795 + _EXPRESSION_UPDATEFIELDS._serialized_end = 5982 + _EXPRESSION_ALIAS._serialized_start = 5984 + _EXPRESSION_ALIAS._serialized_end = 6104 + _EXPRESSION_LAMBDAFUNCTION._serialized_start = 6107 + _EXPRESSION_LAMBDAFUNCTION._serialized_end = 6265 + _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 6267 + _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6329 + _EXPRESSIONCOMMON._serialized_start = 6344 + _EXPRESSIONCOMMON._serialized_end = 6409 + _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6412 + _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6776 + _PYTHONUDF._serialized_start = 6779 + _PYTHONUDF._serialized_end = 6983 + _SCALARSCALAUDF._serialized_start = 6986 + _SCALARSCALAUDF._serialized_end = 7200 + _JAVAUDF._serialized_start = 7203 + _JAVAUDF._serialized_end = 7352 + _TYPEDAGGREGATEEXPRESSION._serialized_start = 7354 + _TYPEDAGGREGATEEXPRESSION._serialized_end = 7453 + _CALLFUNCTION._serialized_start = 7455 + _CALLFUNCTION._serialized_end = 7563 + _NAMEDARGUMENTEXPRESSION._serialized_start = 7565 + _NAMEDARGUMENTEXPRESSION._serialized_end = 7657 + _MERGEACTION._serialized_start = 7660 + _MERGEACTION._serialized_end = 8172 + _MERGEACTION_ASSIGNMENT._serialized_start = 7882 + _MERGEACTION_ASSIGNMENT._serialized_end = 7988 + _MERGEACTION_ACTIONTYPE._serialized_start = 7991 + _MERGEACTION_ACTIONTYPE._serialized_end = 8158 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi index 2c80be6c8fb5a..1566eb1b1e9e2 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi +++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi @@ -1183,6 +1183,7 @@ class Expression(google.protobuf.message.Message): CALL_FUNCTION_FIELD_NUMBER: builtins.int NAMED_ARGUMENT_EXPRESSION_FIELD_NUMBER: builtins.int MERGE_ACTION_FIELD_NUMBER: builtins.int + TYPED_AGGREGATE_EXPRESSION_FIELD_NUMBER: builtins.int EXTENSION_FIELD_NUMBER: builtins.int @property def common(self) -> global___ExpressionCommon: ... @@ -1225,6 +1226,8 @@ class Expression(google.protobuf.message.Message): @property def merge_action(self) -> global___MergeAction: ... @property + def typed_aggregate_expression(self) -> global___TypedAggregateExpression: ... + @property def extension(self) -> google.protobuf.any_pb2.Any: """This field is used to mark extensions to the protocol. When plugins generate arbitrary relations they can add them here. During the planning the correct resolution is done. @@ -1252,6 +1255,7 @@ class Expression(google.protobuf.message.Message): call_function: global___CallFunction | None = ..., named_argument_expression: global___NamedArgumentExpression | None = ..., merge_action: global___MergeAction | None = ..., + typed_aggregate_expression: global___TypedAggregateExpression | None = ..., extension: google.protobuf.any_pb2.Any | None = ..., ) -> None: ... def HasField( @@ -1283,6 +1287,8 @@ class Expression(google.protobuf.message.Message): b"named_argument_expression", "sort_order", b"sort_order", + "typed_aggregate_expression", + b"typed_aggregate_expression", "unresolved_attribute", b"unresolved_attribute", "unresolved_extract_value", @@ -1330,6 +1336,8 @@ class Expression(google.protobuf.message.Message): b"named_argument_expression", "sort_order", b"sort_order", + "typed_aggregate_expression", + b"typed_aggregate_expression", "unresolved_attribute", b"unresolved_attribute", "unresolved_extract_value", @@ -1370,6 +1378,7 @@ class Expression(google.protobuf.message.Message): "call_function", "named_argument_expression", "merge_action", + "typed_aggregate_expression", "extension", ] | None @@ -1620,6 +1629,27 @@ class JavaUDF(google.protobuf.message.Message): global___JavaUDF = JavaUDF +class TypedAggregateExpression(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + SCALAR_SCALA_UDF_FIELD_NUMBER: builtins.int + @property + def scalar_scala_udf(self) -> global___ScalarScalaUDF: + """(Required) The aggregate function object packed into bytes.""" + def __init__( + self, + *, + scalar_scala_udf: global___ScalarScalaUDF | None = ..., + ) -> None: ... + def HasField( + self, field_name: typing_extensions.Literal["scalar_scala_udf", b"scalar_scala_udf"] + ) -> builtins.bool: ... + def ClearField( + self, field_name: typing_extensions.Literal["scalar_scala_udf", b"scalar_scala_udf"] + ) -> None: ... + +global___TypedAggregateExpression = TypedAggregateExpression + class CallFunction(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor