Skip to content

Commit

Permalink
Backport "Follow-up for #185 add more tests and fix the comparison co…
Browse files Browse the repository at this point in the history
…ndition"

This backports #197

Author: hyukjinkwon <[email protected]>

Closes #199 from HyukjinKwon/backport-follwup.
  • Loading branch information
HyukjinKwon committed Nov 3, 2016
1 parent 9d03f9b commit 3aa1d9a
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ private[xml] object StaxXmlParserUtils {
// So, we need to check further to decide if this is a data or just
// a whitespace between them.
parser.next
if (parser.peek.isStartElement) {
skipChildren(parser)
}
}
if (parser.peek.isStartElement) {
skipChildren(parser)
}
case _: EndElement =>
shouldStop = checkEndElement(parser)
Expand Down
2 changes: 2 additions & 0 deletions src/test/resources/cars-no-indentation.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0"?>
<ROWSET><ROW><year>2012</year><make><name><name>Tesla</name></name></make><model>S</model><comment>No comment</comment></ROW><ROW><year>1997</year><make>Ford</make><model>E350</model><comment>Go get one now they are going fast</comment></ROW><ROW><year>2015</year><make>Chevy</make><model>Volt</model><comment>No</comment></ROW></ROWSET>
10 changes: 8 additions & 2 deletions src/test/scala/com/databricks/spark/xml/XmlSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class XmlSuite extends FunSuite with BeforeAndAfterAll {
val carsFile8859 = "src/test/resources/cars-iso-8859-1.xml"
val carsFileGzip = "src/test/resources/cars.xml.gz"
val carsFileBzip2 = "src/test/resources/cars.xml.bz2"
val carsNoIndentationFile = "src/test/resources/cars-no-indentation.xml"
val carsMixedAttrNoChildFile = "src/test/resources/cars-mixed-attr-no-child.xml"
val booksAttributesInNoChild = "src/test/resources/books-attributes-in-no-child.xml"
val carsUnbalancedFile = "src/test/resources/cars-unbalanced-elements.xml"
Expand Down Expand Up @@ -727,11 +728,16 @@ class XmlSuite extends FunSuite with BeforeAndAfterAll {
Seq(
StructField("child", StringType, nullable = true),
StructField("parent", nestedSchema, nullable = true)))
df.schema.printTreeString()
schema.printTreeString()
assert(df.schema == schema)
}

test("Skip and project currecntly XML files without indentation") {
val df = sqlContext.xmlFile(carsNoIndentationFile)
val results = df.select("model").collect()
val years = results.map(_.toSeq.head).toSet
assert(years == Set("S", "E350", "Volt"))
}

test("Select correctly all child fields regardless of pushed down projection") {
val results = new XmlReader()
.withRowTag("book")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,20 @@ class StaxXmlParserUtilsSuite extends FunSuite with BeforeAndAfterAll {
}

test("Skip XML children") {
val input = <ROW><id>2</id><info>
<name>Sam Mad Dog Smith</name><amount><small>1</small><large>9</large></amount></info></ROW>
val input = <ROW><info>
<name>Sam Mad Dog Smith</name><amount><small>1</small>
<large>9</large></amount></info><abc>2</abc><test>2</test></ROW>
val reader = new ByteArrayInputStream(input.toString().getBytes)
val parser = factory.createXMLEventReader(reader)
// We assume here it's reading the value within `id` field.
StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.CHARACTERS)
StaxXmlParserUtils.skipChildren(parser)
assert(
parser.nextEvent().asEndElement().getName.getLocalPart == "id")
assert(parser.nextEvent().asEndElement().getName.getLocalPart == "info")
parser.next()
StaxXmlParserUtils.skipChildren(parser)
assert(parser.nextEvent().asEndElement().getName.getLocalPart == "abc")
parser.next()
StaxXmlParserUtils.skipChildren(parser)
assert(
parser.nextEvent().asEndElement().getName.getLocalPart == "info")
assert(parser.nextEvent().asEndElement().getName.getLocalPart == "test")
}
}

0 comments on commit 3aa1d9a

Please sign in to comment.