Skip to content

Commit

Permalink
ARROW-4571: [Format] Tensor.fbs file has multiple root_type declarations
Browse files Browse the repository at this point in the history
Author: Kenta Murata <[email protected]>

Closes apache#3651 from mrkn/separate_sparse_tensor_format and squashes the following commits:

760cefa <Kenta Murata> Add format/SparseTensor.fbs
1f92cfa <Kenta Murata> Separate SaprseTensor.fbs from Tensor.fbs
  • Loading branch information
mrkn authored and wesm committed Feb 18, 2019
1 parent 240c469 commit 2df33de
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 95 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/ipc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ set(FBS_SRC
${ARROW_SOURCE_DIR}/../format/File.fbs
${ARROW_SOURCE_DIR}/../format/Schema.fbs
${ARROW_SOURCE_DIR}/../format/Tensor.fbs
${ARROW_SOURCE_DIR}/../format/SparseTensor.fbs
${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)

foreach(FIL ${FBS_SRC})
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/ipc/metadata-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
#include "arrow/io/interfaces.h"
#include "arrow/ipc/File_generated.h" // IWYU pragma: keep
#include "arrow/ipc/Message_generated.h"
#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/SparseTensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/message.h"
#include "arrow/ipc/util.h"
#include "arrow/sparse_tensor.h"
Expand Down
2 changes: 1 addition & 1 deletion docs/source/format/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Currently, the Arrow specification consists of these pieces:
- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
- Encapsulated Messages (see Message.fbs)
- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see :doc:`IPC`)
- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs and SparseTensor.fbs)

The metadata currently uses Google's `flatbuffers library`_ for serializing a
couple related pieces of information:
Expand Down
1 change: 1 addition & 0 deletions format/Message.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

include "Schema.fbs";
include "SparseTensor.fbs";
include "Tensor.fbs";

namespace org.apache.arrow.flatbuf;
Expand Down
116 changes: 116 additions & 0 deletions format/SparseTensor.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors".
/// Arrow implementations in general are not required to implement this type

include "Tensor.fbs";

namespace org.apache.arrow.flatbuf;

/// ----------------------------------------------------------------------
/// EXPERIMENTAL: Data structures for sparse tensors

/// Coodinate format of sparse tensor index.
table SparseTensorIndexCOO {
/// COO's index list are represented as a NxM matrix,
/// where N is the number of non-zero values,
/// and M is the number of dimensions of a sparse tensor.
/// indicesBuffer stores the location and size of this index matrix.
/// The type of index value is long, so the stride for the index matrix is unnecessary.
///
/// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
///
/// X[0, 1, 2, 0] := 1
/// X[1, 1, 2, 3] := 2
/// X[0, 2, 1, 0] := 3
/// X[0, 1, 3, 0] := 4
/// X[0, 1, 2, 1] := 5
/// X[1, 2, 0, 4] := 6
///
/// In COO format, the index matrix of X is the following 4x6 matrix:
///
/// [[0, 0, 0, 0, 1, 1],
/// [1, 1, 1, 2, 1, 2],
/// [2, 2, 3, 1, 2, 0],
/// [0, 1, 0, 0, 3, 4]]
///
/// Note that the indices are sorted in lexcographical order.
indicesBuffer: Buffer;
}

/// Compressed Sparse Row format, that is matrix-specific.
table SparseMatrixIndexCSR {
/// indptrBuffer stores the location and size of indptr array that
/// represents the range of the rows.
/// The i-th row spans from indptr[i] to indptr[i+1] in the data.
/// The length of this array is 1 + (the number of rows), and the type
/// of index value is long.
///
/// For example, let X be the following 6x4 matrix:
///
/// X := [[0, 1, 2, 0],
/// [0, 0, 3, 0],
/// [0, 4, 0, 5],
/// [0, 0, 0, 0],
/// [6, 0, 7, 8],
/// [0, 9, 0, 0]].
///
/// The array of non-zero values in X is:
///
/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
///
/// And the indptr of X is:
///
/// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
indptrBuffer: Buffer;

/// indicesBuffer stores the location and size of the array that
/// contains the column indices of the corresponding non-zero values.
/// The type of index value is long.
///
/// For example, the indices of the above X is:
///
/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
indicesBuffer: Buffer;
}

union SparseTensorIndex {
SparseTensorIndexCOO,
SparseMatrixIndexCSR
}

table SparseTensor {
/// The type of data contained in a value cell.
/// Currently only fixed-width value types are supported,
/// no strings or nested types.
type: Type;

/// The dimensions of the tensor, optionally named.
shape: [TensorDim];

/// The number of non-zero values in a sparse tensor.
non_zero_length: long;

/// Sparse tensor index
sparseIndex: SparseTensorIndex;

/// The location and size of the tensor's data
data: Buffer;
}

root_type SparseTensor;
93 changes: 0 additions & 93 deletions format/Tensor.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -51,96 +51,3 @@ table Tensor {
}

root_type Tensor;

/// ----------------------------------------------------------------------
/// EXPERIMENTAL: Data structures for sparse tensors

/// Coodinate format of sparse tensor index.
table SparseTensorIndexCOO {
/// COO's index list are represented as a NxM matrix,
/// where N is the number of non-zero values,
/// and M is the number of dimensions of a sparse tensor.
/// indicesBuffer stores the location and size of this index matrix.
/// The type of index value is long, so the stride for the index matrix is unnecessary.
///
/// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
///
/// X[0, 1, 2, 0] := 1
/// X[1, 1, 2, 3] := 2
/// X[0, 2, 1, 0] := 3
/// X[0, 1, 3, 0] := 4
/// X[0, 1, 2, 1] := 5
/// X[1, 2, 0, 4] := 6
///
/// In COO format, the index matrix of X is the following 4x6 matrix:
///
/// [[0, 0, 0, 0, 1, 1],
/// [1, 1, 1, 2, 1, 2],
/// [2, 2, 3, 1, 2, 0],
/// [0, 1, 0, 0, 3, 4]]
///
/// Note that the indices are sorted in lexcographical order.
indicesBuffer: Buffer;
}

/// Compressed Sparse Row format, that is matrix-specific.
table SparseMatrixIndexCSR {
/// indptrBuffer stores the location and size of indptr array that
/// represents the range of the rows.
/// The i-th row spans from indptr[i] to indptr[i+1] in the data.
/// The length of this array is 1 + (the number of rows), and the type
/// of index value is long.
///
/// For example, let X be the following 6x4 matrix:
///
/// X := [[0, 1, 2, 0],
/// [0, 0, 3, 0],
/// [0, 4, 0, 5],
/// [0, 0, 0, 0],
/// [6, 0, 7, 8],
/// [0, 9, 0, 0]].
///
/// The array of non-zero values in X is:
///
/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
///
/// And the indptr of X is:
///
/// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
indptrBuffer: Buffer;

/// indicesBuffer stores the location and size of the array that
/// contains the column indices of the corresponding non-zero values.
/// The type of index value is long.
///
/// For example, the indices of the above X is:
///
/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
indicesBuffer: Buffer;
}

union SparseTensorIndex {
SparseTensorIndexCOO,
SparseMatrixIndexCSR
}

table SparseTensor {
/// The type of data contained in a value cell.
/// Currently only fixed-width value types are supported,
/// no strings or nested types.
type: Type;

/// The dimensions of the tensor, optionally named.
shape: [TensorDim];

/// The number of non-zero values in a sparse tensor.
non_zero_length: long;

/// Sparse tensor index
sparseIndex: SparseTensorIndex;

/// The location and size of the tensor's data
data: Buffer;
}

root_type SparseTensor;
1 change: 1 addition & 0 deletions java/format/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
<argument>${flatc.generated.files}</argument>
<argument>../../format/Schema.fbs</argument>
<argument>../../format/Tensor.fbs</argument>
<argument>../../format/SparseTensor.fbs</argument>
<argument>../../format/File.fbs</argument>
<argument>../../format/Message.fbs</argument>
</arguments>
Expand Down

0 comments on commit 2df33de

Please sign in to comment.