Skip to content

Commit

Permalink
[nnpackage] Update circle schema (#13930)
Browse files Browse the repository at this point in the history
This commit updates circle schema to 0.9
- Introduce GGML_Q{X}_{Y} types for ggml block quantization
- Introduce weight compression type and option

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored Sep 8, 2024
1 parent b60de1f commit 9213b12
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion nnpackage/schema/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
// Version 0.6: Base up to TensorFlow Lite v2.13.0 schema.
// Version 0.7: Base up to TensorFlow Lite v2.15.0 schema, deprecate data_format in Subgraph table
// Version 0.8: GRU op is added. UINT4 is added.
// Version 0.9: GGML_Q{X}_{Y} types are added. Weight compression option is added

namespace circle;

Expand All @@ -46,7 +47,6 @@ file_extension "circle";

// The type of data stored in a tensor.
enum TensorType : byte {
UINT4 = -1,
FLOAT32 = 0,
FLOAT16 = 1,
INT32 = 2,
Expand All @@ -68,6 +68,15 @@ enum TensorType : byte {
UINT32 = 15,
UINT16 = 16,
INT4 = 17,

// Belows are using negative value to represent not existing TensorType on TensorFlow Lite schema

UINT4 = -1,
// GGML_Q{X}_{Y} are follow ggml quantization spec (https://github.com/ggerganov/ggml)
GGML_Q4_0 = -2,
GGML_Q4_1 = -3,
GGML_Q8_0 = -4,
GGML_Q8_1 = -5,
}

// Custom quantization parameters for experimenting with new quantization
Expand Down Expand Up @@ -221,6 +230,11 @@ table VariantSubType {
has_rank: bool = false;
}

enum CompressionType : byte {
NONE = 0,
HUFFMAN = 1
}

table Tensor {
// The tensor shape. The meaning of each entry is operator-specific but
// builtin ops use: [batch size, height, width, number of channels] (That's
Expand Down Expand Up @@ -262,6 +276,8 @@ table Tensor {
// Currently only 1 subtype is supported. The field is defined as an array for
// flexibility of supporting multiple subtypes in the future.
variant_tensors:[VariantSubType];

compression_type:CompressionType = NONE;
}

// A list of builtin operators. Builtin operators are slightly faster than custom
Expand Down

0 comments on commit 9213b12

Please sign in to comment.