Skip to content

Commit

Permalink
ARROW-4265: [C++] Automatic conversion between Table and std::vector<…
Browse files Browse the repository at this point in the history
…std::tuple<..>>

This enables conversions between a `std::vector<std::tuple<…>>` like and `arrow::Table`.

tuple to Table:

```cpp
std::vector<std::tuple<double, std::string>> rows = ..
std::shared_ptr<Table> table;

if (!arrow::stl::TableFromTupleRange(
      arrow::default_memory_pool(),
      rows, names, &table).ok()
) {
  // Error handling code should go here.
}
```

Table to tuple:

```cpp
// An important aspect here is that the table columns need to be in the
// same order as the columns will later appear in the tuple. As the tuple
// is unnamed, matching is done on positions.
std::shared_ptr<Table> table = ..

// The range needs to be pre-allocated to the respective amount of rows.
// This allows us to pass in an arbitrary range object, not only
// `std::vector`.
std::vector<std::tuple<double, std::string>> rows(2);
if (!arrow::stl::TupleRangeFromTable(*table, &rows).ok()) {
  // Error handling code should go here.
}
```

Author: Korn, Uwe <[email protected]>
Author: Uwe L. Korn <[email protected]>

Closes apache#3404 from xhochy/stl-extension and squashes the following commits:

4856260 <Korn, Uwe> Cast to size_t to compare on equal signedness
aaeacfd <Uwe L. Korn> docker-compose run clang-format
386e5bc <Korn, Uwe> Check size of target
8b472da <Korn, Uwe> Update documentation
1a3743e <Korn, Uwe> Allow building shared libs without tests
9a08a3e <Korn, Uwe> Use full path to checked_cast
e037507 <Korn, Uwe> Use ArrayFromJSON
1ab23f8 <Korn, Uwe> Move to type_singleton
30e66f9 <Korn, Uwe> Add additional STL conversions
  • Loading branch information
xhochy committed Feb 18, 2019
1 parent 811c7dc commit 240c469
Show file tree
Hide file tree
Showing 10 changed files with 561 additions and 25 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ pass ARROW_BUILD_SHARED=on")
# Use shared linking for unit tests if it's available
set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS})
set(ARROW_EXAMPLE_LINK_LIBS arrow_shared)
else()
elseif(ARROW_BUILD_TESTS)
if(NOT ARROW_BUILD_STATIC)
message(FATAL_ERROR "If using static linkage for unit tests, must also \
pass ARROW_BUILD_STATIC=on")
Expand Down
169 changes: 156 additions & 13 deletions cpp/src/arrow/stl-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
#include <gtest/gtest.h>

#include "arrow/stl.h"
#include "arrow/table.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"

using primitive_types_tuple = std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t,
uint16_t, uint32_t, uint64_t, bool, std::string>;

namespace arrow {
namespace stl {

Expand All @@ -36,12 +41,9 @@ TEST(TestSchemaFromTuple, PrimitiveTypesVector) {
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});

std::shared_ptr<Schema> schema =
SchemaFromTuple<std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, bool, std::string>>::
MakeSchema(std::vector<std::string>({"column1", "column2", "column3", "column4",
"column5", "column6", "column7", "column8",
"column9", "column10"}));
std::shared_ptr<Schema> schema = SchemaFromTuple<primitive_types_tuple>::MakeSchema(
std::vector<std::string>({"column1", "column2", "column3", "column4", "column5",
"column6", "column7", "column8", "column9", "column10"}));
ASSERT_TRUE(expected_schema.Equals(*schema));
}

Expand All @@ -53,13 +55,9 @@ TEST(TestSchemaFromTuple, PrimitiveTypesTuple) {
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});

std::shared_ptr<Schema> schema = SchemaFromTuple<
std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t,
bool, std::string>>::MakeSchema(std::make_tuple("column1", "column2",
"column3", "column4",
"column5", "column6",
"column7", "column8",
"column9", "column10"));
std::shared_ptr<Schema> schema = SchemaFromTuple<primitive_types_tuple>::MakeSchema(
std::make_tuple("column1", "column2", "column3", "column4", "column5", "column6",
"column7", "column8", "column9", "column10"));
ASSERT_TRUE(expected_schema.Equals(*schema));
}

Expand All @@ -80,5 +78,150 @@ TEST(TestSchemaFromTuple, NestedList) {
ASSERT_TRUE(expected_schema.Equals(*schema));
}

TEST(TestTableFromTupleVector, PrimitiveTypes) {
std::vector<std::string> names{"column1", "column2", "column3", "column4", "column5",
"column6", "column7", "column8", "column9", "column10"};
std::vector<primitive_types_tuple> rows{
primitive_types_tuple(-1, -2, -3, -4, 1, 2, 3, 4, true, "Tests"),
primitive_types_tuple(-10, -20, -30, -40, 10, 20, 30, 40, false, "Other")};
std::shared_ptr<Table> table;
ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table));

std::shared_ptr<Schema> expected_schema =
schema({field("column1", int8(), false), field("column2", int16(), false),
field("column3", int32(), false), field("column4", int64(), false),
field("column5", uint8(), false), field("column6", uint16(), false),
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});

// Construct expected arrays
std::shared_ptr<Array> int8_array = ArrayFromJSON(int8(), "[-1, -10]");
std::shared_ptr<Array> int16_array = ArrayFromJSON(int16(), "[-2, -20]");
std::shared_ptr<Array> int32_array = ArrayFromJSON(int32(), "[-3, -30]");
std::shared_ptr<Array> int64_array = ArrayFromJSON(int64(), "[-4, -40]");
std::shared_ptr<Array> uint8_array = ArrayFromJSON(uint8(), "[1, 10]");
std::shared_ptr<Array> uint16_array = ArrayFromJSON(uint16(), "[2, 20]");
std::shared_ptr<Array> uint32_array = ArrayFromJSON(uint32(), "[3, 30]");
std::shared_ptr<Array> uint64_array = ArrayFromJSON(uint64(), "[4, 40]");
std::shared_ptr<Array> bool_array = ArrayFromJSON(boolean(), "[true, false]");
std::shared_ptr<Array> string_array = ArrayFromJSON(utf8(), R"(["Tests", "Other"])");
auto expected_table =
Table::Make(expected_schema,
{int8_array, int16_array, int32_array, int64_array, uint8_array,
uint16_array, uint32_array, uint64_array, bool_array, string_array});

ASSERT_TRUE(expected_table->Equals(*table));
}

TEST(TestTableFromTupleVector, ListType) {
using tuple_type = std::tuple<std::vector<int64_t>>;

auto expected_schema =
std::shared_ptr<Schema>(new Schema({field("column1", list(int64()), false)}));
std::shared_ptr<Array> expected_array =
ArrayFromJSON(list(int64()), "[[1, 1, 2, 34], [2, -4]]");
std::shared_ptr<Table> expected_table = Table::Make(expected_schema, {expected_array});

std::vector<tuple_type> rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
tuple_type(std::vector<int64_t>{2, -4})};
std::vector<std::string> names{"column1"};

std::shared_ptr<Table> table;
ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table));
ASSERT_TRUE(expected_table->Equals(*table));
}

TEST(TestTupleVectorFromTable, PrimitiveTypes) {
compute::FunctionContext ctx;
compute::CastOptions cast_options;

std::vector<primitive_types_tuple> expected_rows{
primitive_types_tuple(-1, -2, -3, -4, 1, 2, 3, 4, true, "Tests"),
primitive_types_tuple(-10, -20, -30, -40, 10, 20, 30, 40, false, "Other")};

std::shared_ptr<Schema> schema = std::shared_ptr<Schema>(
new Schema({field("column1", int8(), false), field("column2", int16(), false),
field("column3", int32(), false), field("column4", int64(), false),
field("column5", uint8(), false), field("column6", uint16(), false),
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)}));

// Construct expected arrays
std::shared_ptr<Array> int8_array;
ArrayFromVector<Int8Type, int8_t>({-1, -10}, &int8_array);
std::shared_ptr<Array> int16_array;
ArrayFromVector<Int16Type, int16_t>({-2, -20}, &int16_array);
std::shared_ptr<Array> int32_array;
ArrayFromVector<Int32Type, int32_t>({-3, -30}, &int32_array);
std::shared_ptr<Array> int64_array;
ArrayFromVector<Int64Type, int64_t>({-4, -40}, &int64_array);
std::shared_ptr<Array> uint8_array;
ArrayFromVector<UInt8Type, uint8_t>({1, 10}, &uint8_array);
std::shared_ptr<Array> uint16_array;
ArrayFromVector<UInt16Type, uint16_t>({2, 20}, &uint16_array);
std::shared_ptr<Array> uint32_array;
ArrayFromVector<UInt32Type, uint32_t>({3, 30}, &uint32_array);
std::shared_ptr<Array> uint64_array;
ArrayFromVector<UInt64Type, uint64_t>({4, 40}, &uint64_array);
std::shared_ptr<Array> bool_array;
ArrayFromVector<BooleanType, bool>({true, false}, &bool_array);
std::shared_ptr<Array> string_array;
ArrayFromVector<StringType, std::string>({"Tests", "Other"}, &string_array);
auto table = Table::Make(
schema, {int8_array, int16_array, int32_array, int64_array, uint8_array,
uint16_array, uint32_array, uint64_array, bool_array, string_array});

std::vector<primitive_types_tuple> rows(2);
ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
ASSERT_EQ(rows, expected_rows);

// The number of rows must match
std::vector<primitive_types_tuple> too_few_rows(1);
ASSERT_RAISES(Invalid, TupleRangeFromTable(*table, cast_options, &ctx, &too_few_rows));

// The number of columns must match
std::shared_ptr<Table> corrupt_table;
ASSERT_OK(table->RemoveColumn(0, &corrupt_table));
ASSERT_RAISES(Invalid, TupleRangeFromTable(*corrupt_table, cast_options, &ctx, &rows));
}

TEST(TestTupleVectorFromTable, ListType) {
using tuple_type = std::tuple<std::vector<int64_t>>;

compute::FunctionContext ctx;
compute::CastOptions cast_options;
auto expected_schema =
std::shared_ptr<Schema>(new Schema({field("column1", list(int64()), false)}));
std::shared_ptr<Array> expected_array =
ArrayFromJSON(list(int64()), "[[1, 1, 2, 34], [2, -4]]");
std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});

std::vector<tuple_type> expected_rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
tuple_type(std::vector<int64_t>{2, -4})};

std::vector<tuple_type> rows(2);
ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
ASSERT_EQ(rows, expected_rows);
}

TEST(TestTupleVectorFromTable, CastingNeeded) {
using tuple_type = std::tuple<std::vector<int64_t>>;

compute::FunctionContext ctx;
compute::CastOptions cast_options;
auto expected_schema =
std::shared_ptr<Schema>(new Schema({field("column1", list(int16()), false)}));
std::shared_ptr<Array> expected_array =
ArrayFromJSON(list(int16()), "[[1, 1, 2, 34], [2, -4]]");
std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});

std::vector<tuple_type> expected_rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
tuple_type(std::vector<int64_t>{2, -4})};

std::vector<tuple_type> rows(2);
ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
ASSERT_EQ(rows, expected_rows);
}

} // namespace stl
} // namespace arrow
Loading

0 comments on commit 240c469

Please sign in to comment.