Skip to content

Commit

Permalink
[SPARK-49275][SQL] Fix return type nullness of the xpath expression
Browse files Browse the repository at this point in the history
The `xpath` expression incorrectly marks its return type as array of non-null strings. However, it can actually return an array containing nulls. This can cause NPE in code generation, such as query `select coalesce(xpath(repeat('<a></a>', id), 'a')[0], '') from range(1, 2)`.

It avoids potential failures in queries that uses the `xpath` expression.

No.

A new unit test. It would fail without the change in the PR.

No.

Closes apache#47796 from chenhao-db/fix_xpath_nullness.

Authored-by: Chenhao Li <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
chenhao-db committed Sep 2, 2024
1 parent 38ad0e7 commit 79065ac
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,16 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
Examples:
> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
["b1","b2","b3"]
> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b');
[null,null,null]
""",
since = "2.0.0",
group = "xml_funcs")
// scalastyle:on line.size.limit
case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath"
override def dataType: DataType = ArrayType(StringType, containsNull = false)

override def dataType: DataType = ArrayType(StringType)

override def nullSafeEval(xml: Any, path: Any): Any = {
val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
testExpr("<a><b class='bb'>b1</b><b>b2</b><b>b3</b><c class='bb'>c1</c><c>c2</c></a>",
"a/*[@class='bb']/text()", Seq("b1", "c1"))

checkEvaluation(
Coalesce(Seq(
GetArrayItem(XPathList(Literal("<a></a>"), Literal("a")), Literal(0)),
Literal("nul"))), "nul")

testNullAndErrorBehavior(testExpr)
}

Expand Down

0 comments on commit 79065ac

Please sign in to comment.