Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhao-db committed Mar 25, 2024
1 parent b2f6474 commit 19f8838
Show file tree
Hide file tree
Showing 10 changed files with 938 additions and 5 deletions.
13 changes: 13 additions & 0 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -2762,6 +2762,19 @@
],
"sqlState" : "42K09"
},
"INVALID_VARIANT_CAST" : {
"message" : [
"The variant value `<value>` cannot be cast into `<dataType>`. Please use `try_variant_get` instead."
],
"sqlState" : "22023"
},
"INVALID_VARIANT_GET_PATH" : {
"message" : [
"The path `<path>` is not a valid variant extraction path in `<functionName>`.",
"A valid path should start with `$` and is followed by zero or more segments like `[123]`, `.name`, `['name']`, or `[\"name\"]`."
],
"sqlState" : "22023"
},
"INVALID_VIEW_TEXT" : {
"message" : [
"The view <viewName> cannot be displayed due to invalid view text: <viewText>. This may be caused by an unauthorized modification of the view or an incorrect query syntax. Please check your query syntax and verify that the view has not been tampered with."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import java.io.CharArrayWriter;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Arrays;

import static org.apache.spark.types.variant.VariantUtil.*;

Expand All @@ -32,10 +34,19 @@
public final class Variant {
private final byte[] value;
private final byte[] metadata;
// The variant value doesn't use the whole `value` binary, but starts from its `pos` index and
// spans a size of `valueSize(value, pos)`. This design avoids frequent copies of the value binary
// when reading a sub-variant in the array/object element.
private final int pos;

public Variant(byte[] value, byte[] metadata) {
this(value, metadata, 0);
}

Variant(byte[] value, byte[] metadata, int pos) {
this.value = value;
this.metadata = metadata;
this.pos = pos;
// There is currently only one allowed version.
if (metadata.length < 1 || (metadata[0] & VERSION_MASK) != VERSION) {
throw malformedVariant();
Expand All @@ -48,18 +59,138 @@ public Variant(byte[] value, byte[] metadata) {
}

public byte[] getValue() {
return value;
if (pos == 0) return value;
int size = valueSize(value, pos);
checkIndex(pos + size - 1, value.length);
return Arrays.copyOfRange(value, pos, pos + size);
}

public byte[] getMetadata() {
return metadata;
}

// Get a boolean value from the variant.
public boolean getBoolean() {
return VariantUtil.getBoolean(value, pos);
}

// Get a long value from the variant.
public long getLong() {
return VariantUtil.getLong(value, pos);
}

// Get a double value from the variant.
public double getDouble() {
return VariantUtil.getDouble(value, pos);
}

// Get a decimal value from the variant.
public BigDecimal getDecimal() {
return VariantUtil.getDecimal(value, pos);
}

// Get a string value from the variant.
public String getString() {
return VariantUtil.getString(value, pos);
}

// Get the value type of the variant.
public Type getType() {
return VariantUtil.getType(value, pos);
}

// Get the number of object fields in the variant.
// It is only legal to call it when `getType()` is `Type.OBJECT`.
public int objectSize() {
return handleObject(value, pos,
(size, idSize, offsetSize, idStart, offsetStart, dataStart) -> size);
}

// Find the field value whose key is equal to `key`. Return null if the key is not found.
// It is only legal to call it when `getType()` is `Type.OBJECT`.
public Variant getFieldByKey(String key) {
return handleObject(value, pos, (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
// Use linear search for a short list. Switch to binary search when the length reaches
// `BINARY_SEARCH_THRESHOLD`.
final int BINARY_SEARCH_THRESHOLD = 32;
if (size < BINARY_SEARCH_THRESHOLD) {
for (int i = 0; i < size; ++i) {
int id = readUnsigned(value, idStart + idSize * i, idSize);
if (key.equals(getMetadataKey(metadata, id))) {
int offset = readUnsigned(value, offsetStart + offsetSize * i, offsetSize);
return new Variant(value, metadata, dataStart + offset);
}
}
} else {
int low = 0;
int high = size - 1;
while (low <= high) {
// Use unsigned right shift to compute the middle of `low` and `high`. This is not only a
// performance optimization, because it can properly handle the case where `low + high`
// overflows int.
int mid = (low + high) >>> 1;
int id = readUnsigned(value, idStart + idSize * mid, idSize);
int cmp = getMetadataKey(metadata, id).compareTo(key);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
int offset = readUnsigned(value, offsetStart + offsetSize * mid, offsetSize);
return new Variant(value, metadata, dataStart + offset);
}
}
}
return null;
});
}

public static final class ObjectField {
public final String key;
public final Variant value;

public ObjectField(String key, Variant value) {
this.key = key;
this.value = value;
}
}

// Get the object field at the `index` slot. Return null if `index` is out of the bound of
// `[0, objectSize())`.
// It is only legal to call it when `getType()` is `Type.OBJECT`.
public ObjectField getFieldAtIndex(int index) {
return handleObject(value, pos, (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
if (index < 0 || index >= size) return null;
int id = readUnsigned(value, idStart + idSize * index, idSize);
int offset = readUnsigned(value, offsetStart + offsetSize * index, offsetSize);
String key = getMetadataKey(metadata, id);
Variant v = new Variant(value, metadata, dataStart + offset);
return new ObjectField(key, v);
});
}

// Get the number of array elements in the variant.
// It is only legal to call it when `getType()` is `Type.ARRAY`.
public int arraySize() {
return handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) -> size);
}

// Get the array element at the `index` slot. Return null if `index` is out of the bound of
// `[0, arraySize())`.
// It is only legal to call it when `getType()` is `Type.ARRAY`.
public Variant getElementAtIndex(int index) {
return handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) -> {
if (index < 0 || index >= size) return null;
int offset = readUnsigned(value, offsetStart + offsetSize * index, offsetSize);
return new Variant(value, metadata, dataStart + offset);
});
}

// Stringify the variant in JSON format.
// Throw `MALFORMED_VARIANT` if the variant is malformed.
public String toJson() {
StringBuilder sb = new StringBuilder();
toJsonImpl(value, metadata, 0, sb);
toJsonImpl(value, metadata, pos, sb);
return sb.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,52 @@ public static Type getType(byte[] value, int pos) {
}
}

// Compute the size in bytes of the variant value `value[pos...]`. `value.length - pos` is an
// upper bound of the size, but the actual size can be smaller.
// Throw `MALFORMED_VARIANT` if the variant is malformed.
public static int valueSize(byte[] value, int pos) {
checkIndex(pos, value.length);
int basicType = value[pos] & BASIC_TYPE_MASK;
int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
switch (basicType) {
case SHORT_STR:
return 1 + typeInfo;
case OBJECT:
return handleObject(value, pos,
(size, idSize, offsetSize, idStart, offsetStart, dataStart) ->
dataStart - pos + readUnsigned(value, offsetStart + size * offsetSize, offsetSize));
case ARRAY:
return handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) ->
dataStart - pos + readUnsigned(value, offsetStart + size * offsetSize, offsetSize));
default:
switch (typeInfo) {
case NULL:
case TRUE:
case FALSE:
return 1;
case INT1:
return 2;
case INT2:
return 3;
case INT4:
return 5;
case INT8:
case DOUBLE:
return 9;
case DECIMAL4:
return 6;
case DECIMAL8:
return 10;
case DECIMAL16:
return 18;
case LONG_STR:
return 1 + U32_SIZE + readUnsigned(value, pos + 1, U32_SIZE);
default:
throw malformedVariant();
}
}
}

static IllegalStateException unexpectedType(Type type) {
return new IllegalStateException("Expect type to be " + type);
}
Expand Down
13 changes: 13 additions & 0 deletions docs/sql-error-conditions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,19 @@ Invalid usage of `<elem>` in `<prettyName>`.

Variable type must be string type but got `<varType>`.

### INVALID_VARIANT_CAST

[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)

The variant value ``<value>`` cannot be cast into ``<dataType>``. Please use `try_variant_get` instead.

### INVALID_VARIANT_GET_PATH

[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)

The path ``<path>`` is not a valid variant extraction path in ``<functionName>``.
A valid path should start with `$` and is followed by zero or more segments like `[123]`, `.name`, `['name']`, or `["name"]`.

### INVALID_VIEW_TEXT

[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,8 @@ object FunctionRegistry {

// Variant
expression[ParseJson]("parse_json"),
expressionBuilder("variant_get", VariantGetExpressionBuilder),
expressionBuilder("try_variant_get", TryVariantGetExpressionBuilder),

// cast
expression[Cast]("cast"),
Expand Down
Loading

0 comments on commit 19f8838

Please sign in to comment.