From 8ca973ea88117780311a8c92e7b5b224e4dc8d50 Mon Sep 17 00:00:00 2001 From: Chenhao Li Date: Tue, 9 Apr 2024 10:42:20 -0700 Subject: [PATCH] review feedback --- .../apache/spark/types/variant/Variant.java | 32 +++--------- .../spark/types/variant/VariantUtil.java | 51 ++++--------------- .../variant/variantExpressions.scala | 6 +-- 3 files changed, 21 insertions(+), 68 deletions(-) diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java b/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java index 69f2ca90ba24d..4aeb2c6e14355 100644 --- a/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java +++ b/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java @@ -29,6 +29,7 @@ import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.Base64; import java.util.Locale; @@ -97,27 +98,6 @@ public BigDecimal getDecimal() { return VariantUtil.getDecimal(value, pos); } - // Get a raw date value (number of days from the Unix epoch) from the variant. - public int getRawDate() { - return VariantUtil.getRawDate(value, pos); - } - - // Get a date value from the variant. - public LocalDate getDate() { - return VariantUtil.getDate(value, pos); - } - - // Get a raw timestamp/timestamp_ntz value (number of microseconds from the Unix epoch) from the - // variant - public long getRawTimestamp() { - return VariantUtil.getRawTimestamp(value, pos); - } - - // Get a timestamp/timestamp_ntz value from the variant. - public Instant getTimestamp() { - return VariantUtil.getTimestamp(value, pos); - } - // Get a float value from the variant. public float getFloat() { return VariantUtil.getFloat(value, pos); @@ -266,6 +246,10 @@ static void appendQuoted(StringBuilder sb, String str) { .appendOffset("+HH:MM", "+00:00") .toFormatter(Locale.US); + private static Instant microsToInstant(long timestamp) { + return Instant.EPOCH.plus(timestamp, ChronoUnit.MICROS); + } + static void toJsonImpl(byte[] value, byte[] metadata, int pos, StringBuilder sb, ZoneId zoneId) { switch (VariantUtil.getType(value, pos)) { case OBJECT: @@ -316,15 +300,15 @@ static void toJsonImpl(byte[] value, byte[] metadata, int pos, StringBuilder sb, sb.append(VariantUtil.getDecimal(value, pos).toPlainString()); break; case DATE: - appendQuoted(sb, VariantUtil.getDate(value, pos).toString()); + appendQuoted(sb, LocalDate.ofEpochDay((int) VariantUtil.getLong(value, pos)).toString()); break; case TIMESTAMP: appendQuoted(sb, TIMESTAMP_FORMATTER.format( - VariantUtil.getTimestamp(value, pos).atZone(zoneId))); + microsToInstant(VariantUtil.getLong(value, pos)).atZone(zoneId))); break; case TIMESTAMP_NTZ: appendQuoted(sb, TIMESTAMP_NTZ_FORMATTER.format( - VariantUtil.getTimestamp(value, pos).atZone(ZoneOffset.UTC))); + microsToInstant(VariantUtil.getLong(value, pos)).atZone(ZoneOffset.UTC))); break; case FLOAT: sb.append(VariantUtil.getFloat(value, pos)); diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java index 0e223659e8077..e4e9cc8b4cfac 100644 --- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java +++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java @@ -23,9 +23,6 @@ import java.math.BigDecimal; import java.math.BigInteger; -import java.time.Instant; -import java.time.LocalDate; -import java.time.temporal.ChronoUnit; import java.util.Arrays; /** @@ -357,23 +354,31 @@ public static boolean getBoolean(byte[] value, int pos) { } // Get a long value from variant value `value[pos...]`. + // It is only legal to call it if `getType` returns one of `Type.LONG/DATE/TIMESTAMP/ + // TIMESTAMP_NTZ`. If the type is `DATE`, the return value is guaranteed to fit into an int and + // represents the number of days from the Unix epoch. If the type is `TIMESTAMP/TIMESTAMP_NTZ`, + // the return value represents the number of microseconds from the Unix epoch. // Throw `MALFORMED_VARIANT` if the variant is malformed. public static long getLong(byte[] value, int pos) { checkIndex(pos, value.length); int basicType = value[pos] & BASIC_TYPE_MASK; int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK; - if (basicType != PRIMITIVE) throw unexpectedType(Type.LONG); + String exceptionMessage = "Expect type to be LONG/DATE/TIMESTAMP/TIMESTAMP_NTZ"; + if (basicType != PRIMITIVE) throw new IllegalStateException(exceptionMessage); switch (typeInfo) { case INT1: return readLong(value, pos + 1, 1); case INT2: return readLong(value, pos + 1, 2); case INT4: + case DATE: return readLong(value, pos + 1, 4); case INT8: + case TIMESTAMP: + case TIMESTAMP_NTZ: return readLong(value, pos + 1, 8); default: - throw unexpectedType(Type.LONG); + throw new IllegalStateException(exceptionMessage); } } @@ -419,42 +424,6 @@ public static BigDecimal getDecimal(byte[] value, int pos) { return result.stripTrailingZeros(); } - // Get a raw date value (number of days from the Unix epoch) from variant value `value[pos...]`. - // Throw `MALFORMED_VARIANT` if the variant is malformed. - public static int getRawDate(byte[] value, int pos) { - checkIndex(pos, value.length); - int basicType = value[pos] & BASIC_TYPE_MASK; - int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK; - if (basicType != PRIMITIVE || typeInfo != DATE) throw unexpectedType(Type.DATE); - return (int) readLong(value, pos + 1, 4); - } - - // Get a date value from variant value `value[pos...]`. - // Throw `MALFORMED_VARIANT` if the variant is malformed. - public static LocalDate getDate(byte[] value, int pos) { - return LocalDate.ofEpochDay(getRawDate(value, pos)); - } - - // Get a raw timestamp/timestamp_ntz value (number of microseconds from the Unix epoch) from - // variant value `value[pos...]`. The variant library doesn't distinguish the content of these two - // types. Instead, the user interprets them differently. - // Throw `MALFORMED_VARIANT` if the variant is malformed. - public static long getRawTimestamp(byte[] value, int pos) { - checkIndex(pos, value.length); - int basicType = value[pos] & BASIC_TYPE_MASK; - int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK; - if (basicType != PRIMITIVE || (typeInfo != TIMESTAMP && typeInfo != TIMESTAMP_NTZ)) { - throw new IllegalStateException("Expect type to be TIMESTAMP/TIMESTAMP_NTZ"); - } - return readLong(value, pos + 1, 8); - } - - // Get a timestamp/timestamp_ntz value from variant value `value[pos...]`. - // Throw `MALFORMED_VARIANT` if the variant is malformed. - public static Instant getTimestamp(byte[] value, int pos) { - return Instant.EPOCH.plus(getRawTimestamp(value, pos), ChronoUnit.MICROS); - } - // Get a float value from variant value `value[pos...]`. // Throw `MALFORMED_VARIANT` if the variant is malformed. public static float getFloat(byte[] value, int pos) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala index 135e528d02a2b..abf3df2a0f6d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala @@ -299,9 +299,9 @@ case object VariantGet { case Type.DECIMAL => val d = Decimal(v.getDecimal) Literal(Decimal(v.getDecimal), DecimalType(d.precision, d.scale)) - case Type.DATE => Literal(v.getRawDate, DateType) - case Type.TIMESTAMP => Literal(v.getRawTimestamp, TimestampType) - case Type.TIMESTAMP_NTZ => Literal(v.getRawTimestamp, TimestampNTZType) + case Type.DATE => Literal(v.getLong.toInt, DateType) + case Type.TIMESTAMP => Literal(v.getLong, TimestampType) + case Type.TIMESTAMP_NTZ => Literal(v.getLong, TimestampNTZType) case Type.FLOAT => Literal(v.getFloat, FloatType) case Type.BINARY => Literal(v.getBinary, BinaryType) // We have handled other cases and should never reach here. This case is only intended