Skip to content

Commit

Permalink
Remove check for Nulls buffer when null_count = 0 (facebookincubato…
Browse files Browse the repository at this point in the history
…r#941)

Summary:
Pull Request resolved: facebookincubator#941

In PyArrow 6.0, it's legit for an arrow array to have non-null NullBuffer while null_count is zero (see also pytorch/torcharrow#109) :
```
import pyarrow as pa
>>> pa.__version__
'6.0.0'
>>> a = pa.array([1, 2, None, 3])
>>> a = a.fill_null(12)
>>> a.buffers()
[<pyarrow.lib.Buffer object at 0x7fe688222330>, <pyarrow.lib.Buffer object at 0x7fe6680c2ef0>]
```

User also reports similiar issue when converting arrow array reading from Parquet, see pytorch/torcharrow#146 (comment)

Differential Revision: D33836988

fbshipit-source-id: c16fa29d9850f4e5fb73b4421a2d6de55ec4702c
  • Loading branch information
wenleix authored and facebook-github-bot committed Jan 29, 2022
1 parent 83416e1 commit 23d1bd7
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 44 deletions.
7 changes: 4 additions & 3 deletions velox/vector/arrow/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,9 +677,10 @@ VectorPtr importFromArrowImpl(
nulls = wrapInBufferView(
arrowArray.buffers[0], bits::nbytes(arrowArray.length));
} else {
VELOX_USER_CHECK_NULL(
arrowArray.buffers[0],
"Nulls buffer must be nullptr when null_count is zero.");
// It's legit for an arrow array to have non-null nulls buffer while
// null_count is zero.
//
// Converted Velox vector will not have null buffer.
}

// String data types (VARCHAR and VARBINARY).
Expand Down
103 changes: 62 additions & 41 deletions velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
template <typename T>
ArrowArray fillArrowArray(
const std::vector<std::optional<T>>& inputValues,
ArrowContextHolder& holder) {
ArrowContextHolder& holder,
bool alwaysHasNullsBuffer) {
int64_t length = inputValues.size();
int64_t nullCount = 0;

Expand All @@ -470,14 +471,17 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
}
}

holder.buffers[0] = (nullCount == 0) ? nullptr : (const void*)rawNulls;
holder.buffers[0] = (nullCount == 0 && !alwaysHasNullsBuffer)
? nullptr
: (const void*)rawNulls;
holder.buffers[1] = (length == 0) ? nullptr : (const void*)rawValues;
return makeArrowArray(holder.buffers, 2, length, nullCount);
}

ArrowArray fillArrowArray(
const std::vector<std::optional<std::string>>& inputValues,
ArrowContextHolder& holder) {
ArrowContextHolder& holder,
bool alwaysHasNullsBuffer) {
int64_t length = inputValues.size();
int64_t nullCount = 0;

Expand Down Expand Up @@ -518,7 +522,9 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
}
}

holder.buffers[0] = (nullCount == 0) ? nullptr : (const void*)rawNulls;
holder.buffers[0] = (nullCount == 0 && !alwaysHasNullsBuffer)
? nullptr
: (const void*)rawNulls;
return makeArrowArray(holder.buffers, 3, length, nullCount);
}

Expand All @@ -528,9 +534,10 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
template <typename T>
void testArrowImport(
const char* format,
const std::vector<std::optional<T>>& inputValues) {
const std::vector<std::optional<T>>& inputValues,
bool alwaysHasNullsBuffer) {
ArrowContextHolder holder;
auto arrowArray = fillArrowArray(inputValues, holder);
auto arrowArray = fillArrowArray(inputValues, holder, alwaysHasNullsBuffer);

auto arrowSchema = makeArrowSchema(format);
auto output = importFromArrow(arrowSchema, arrowArray, pool_.get());
Expand Down Expand Up @@ -563,37 +570,45 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
}
}

void testImportScalar() {
testArrowImport<bool>("b", {});
testArrowImport<bool>("b", {true});
testArrowImport<bool>("b", {false});
testArrowImport<bool>("b", {true, false, true});
testArrowImport<bool>("b", {true, std::nullopt, true});

testArrowImport<int8_t>("c", {});
testArrowImport<int8_t>("c", {101});
testArrowImport<int8_t>("c", {5, 4, 3, 1, 2});
testArrowImport<int8_t>("c", {8, std::nullopt, std::nullopt});
testArrowImport<int8_t>("c", {std::nullopt, std::nullopt});

testArrowImport<int16_t>("s", {5, 4, 3, 1, 2});
testArrowImport<int32_t>("i", {5, 4, 3, 1, 2});

testArrowImport<int64_t>("l", {});
testArrowImport<int64_t>("l", {std::nullopt});
testArrowImport<int64_t>("l", {-99, 4, 318321631, 1211, -12});
testArrowImport<int64_t>("l", {std::nullopt, 12345678, std::nullopt});
testArrowImport<int64_t>("l", {std::nullopt, std::nullopt});

testArrowImport<double>("g", {});
testArrowImport<double>("g", {std::nullopt});
testArrowImport<double>("g", {-99.9, 4.3, 31.1, 129.11, -12});
testArrowImport<float>("f", {-99.9, 4.3, 31.1, 129.11, -12});
void testImportScalar(bool alwaysHasNullsBuffer) {
testArrowImport<bool>("b", {}, alwaysHasNullsBuffer);
testArrowImport<bool>("b", {true}, alwaysHasNullsBuffer);
testArrowImport<bool>("b", {false}, alwaysHasNullsBuffer);
testArrowImport<bool>("b", {true, false, true}, alwaysHasNullsBuffer);
testArrowImport<bool>(
"b", {true, std::nullopt, true}, alwaysHasNullsBuffer);

testArrowImport<int8_t>("c", {}, alwaysHasNullsBuffer);
testArrowImport<int8_t>("c", {101}, alwaysHasNullsBuffer);
testArrowImport<int8_t>("c", {5, 4, 3, 1, 2}, alwaysHasNullsBuffer);
testArrowImport<int8_t>(
"c", {8, std::nullopt, std::nullopt}, alwaysHasNullsBuffer);
testArrowImport<int8_t>(
"c", {std::nullopt, std::nullopt}, alwaysHasNullsBuffer);

testArrowImport<int16_t>("s", {5, 4, 3, 1, 2}, alwaysHasNullsBuffer);
testArrowImport<int32_t>("i", {5, 4, 3, 1, 2}, alwaysHasNullsBuffer);

testArrowImport<int64_t>("l", {}, alwaysHasNullsBuffer);
testArrowImport<int64_t>("l", {std::nullopt}, alwaysHasNullsBuffer);
testArrowImport<int64_t>(
"l", {-99, 4, 318321631, 1211, -12}, alwaysHasNullsBuffer);
testArrowImport<int64_t>(
"l", {std::nullopt, 12345678, std::nullopt}, alwaysHasNullsBuffer);
testArrowImport<int64_t>(
"l", {std::nullopt, std::nullopt}, alwaysHasNullsBuffer);

testArrowImport<double>("g", {}, alwaysHasNullsBuffer);
testArrowImport<double>("g", {std::nullopt}, alwaysHasNullsBuffer);
testArrowImport<double>(
"g", {-99.9, 4.3, 31.1, 129.11, -12}, alwaysHasNullsBuffer);
testArrowImport<float>(
"f", {-99.9, 4.3, 31.1, 129.11, -12}, alwaysHasNullsBuffer);
}

void testImportString() {
testArrowImport<std::string>("u", {});
testArrowImport<std::string>("u", {"single"});
void testImportString(bool alwaysHasNullsBuffer) {
testArrowImport<std::string>("u", {}, alwaysHasNullsBuffer);
testArrowImport<std::string>("u", {"single"}, alwaysHasNullsBuffer);
testArrowImport<std::string>(
"u",
{
Expand All @@ -607,7 +622,8 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
"side",
std::nullopt,
std::nullopt,
});
},
alwaysHasNullsBuffer);

testArrowImport<std::string>(
"z",
Expand All @@ -619,7 +635,8 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
"varbinary",
"vector",
std::nullopt,
});
},
alwaysHasNullsBuffer);
}

void testImportFailures() {
Expand Down Expand Up @@ -702,11 +719,13 @@ class ArrowBridgeArrayImportAsViewerTest : public ArrowBridgeArrayImportTest {
};

TEST_F(ArrowBridgeArrayImportAsViewerTest, scalar) {
testImportScalar();
testImportScalar(false);
testImportScalar(true);
}

TEST_F(ArrowBridgeArrayImportAsViewerTest, string) {
testImportString();
testImportString(false);
testImportString(true);
}

TEST_F(ArrowBridgeArrayImportAsViewerTest, failures) {
Expand All @@ -725,11 +744,13 @@ class ArrowBridgeArrayImportAsOwnerTest
};

TEST_F(ArrowBridgeArrayImportAsOwnerTest, scalar) {
testImportScalar();
testImportScalar(false);
testImportScalar(true);
}

TEST_F(ArrowBridgeArrayImportAsOwnerTest, string) {
testImportString();
testImportString(false);
testImportString(true);
}

TEST_F(ArrowBridgeArrayImportAsOwnerTest, failures) {
Expand Down

0 comments on commit 23d1bd7

Please sign in to comment.