Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37582: [Go][Parquet] Implement Float16 logical type #37599

Merged
merged 11 commits into from
Nov 13, 2023
46 changes: 41 additions & 5 deletions go/arrow/float16/float16.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package float16

import (
"encoding/binary"
"math"
"strconv"
)
Expand All @@ -29,6 +30,11 @@ type Num struct {
bits uint16
}

var (
MaxNum = Num{bits: 0b0111101111111111}
MinNum = MaxNum.Negate()
)
benibus marked this conversation as resolved.
Show resolved Hide resolved

// New creates a new half-precision floating point value from the provided
// float32 value.
func New(f float32) Num {
Expand Down Expand Up @@ -86,6 +92,11 @@ func (n Num) Div(rhs Num) Num {
return New(n.Float32() / rhs.Float32())
}

// Equal returns true if the value represented by n is == other
func (n Num) Equal(other Num) bool {
return n.Float32() == other.Float32()
}

// Greater returns true if the value represented by n is > other
func (n Num) Greater(other Num) bool {
return n.Float32() > other.Float32()
Expand Down Expand Up @@ -152,14 +163,39 @@ func (n Num) Abs() Num {
}

func (n Num) Sign() int {
f := n.Float32()
if f > 0 {
return 1
} else if f == 0 {
if n.IsZero() {
return 0
} else if n.Signbit() {
return -1
}
return -1
return 1
}

func (n Num) Signbit() bool { return (n.bits & 0x8000) != 0 }
benibus marked this conversation as resolved.
Show resolved Hide resolved

func (n Num) IsNaN() bool { return (n.bits & 0x7fff) > 0x7c00 }

func (n Num) IsZero() bool { return (n.bits & 0x7fff) == 0 }

func (f Num) Uint16() uint16 { return f.bits }
func (f Num) String() string { return strconv.FormatFloat(float64(f.Float32()), 'g', -1, 32) }

func Inf() Num { return Num{bits: 0x7c00} }

func NaN() Num { return Num{bits: 0x7fff} }

func FromBits(src uint16) Num { return Num{bits: src} }

func FromLEBytes(src []byte) Num {
return Num{bits: binary.LittleEndian.Uint16(src)}
}

func (f Num) PutLEBytes(dst []byte) {
binary.LittleEndian.PutUint16(dst, f.bits)
}

func (f Num) ToLEBytes() []byte {
dst := make([]byte, 2)
f.PutLEBytes(dst)
return dst
}
43 changes: 43 additions & 0 deletions go/arrow/float16/float16_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ func TestSign(t *testing.T) {
}{
{Num{bits: 0x4580}, 1}, // 5.5
{Num{bits: 0x0000}, 0}, // 0
{Num{bits: 0x8000}, 0}, // -0
{Num{bits: 0xC580}, -1}, // -5.5
} {
t.Run("sign", func(t *testing.T) {
Expand All @@ -248,3 +249,45 @@ func TestSign(t *testing.T) {
})
}
}

func TestSignbit(t *testing.T) {
for _, tc := range []struct {
n Num
want bool
}{
{Num{bits: 0x4580}, false}, // 5.5
{Num{bits: 0x0000}, false}, // 0
{Num{bits: 0x8000}, true}, // -0
{Num{bits: 0xC580}, true}, // -5.5
} {
t.Run("signbit", func(t *testing.T) {
n := tc.n.Signbit()
if got, want := n, tc.want; got != want {
t.Fatalf("invalid value. got=%v, want=%v", got, want)
}
})
}
}

func TestIsNaN(t *testing.T) {
for _, tc := range []struct {
n Num
want bool
}{
{NaN(), true},
{NaN().Negate(), true},
{Inf(), false},
{Inf().Negate(), false},
{Num{bits: 0x7c01}, true}, // nan
{Num{bits: 0xfc01}, true}, // -nan
{Num{bits: 0x7e00}, true}, // nan
{Num{bits: 0xfe00}, true}, // -nan
} {
t.Run("isnan", func(t *testing.T) {
n := tc.n.IsNaN()
if got, want := n, tc.want; got != want {
t.Fatalf("invalid value. got=%v, want=%v", got, want)
}
})
}
}
13 changes: 11 additions & 2 deletions go/parquet/file/column_writer_types.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 22 additions & 6 deletions go/parquet/file/column_writer_types.gen.go.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package file

import (
"fmt"

"github.com/apache/arrow/go/v15/parquet"
"github.com/apache/arrow/go/v15/parquet/metadata"
"github.com/apache/arrow/go/v15/parquet/internal/encoding"
Expand Down Expand Up @@ -83,7 +83,7 @@ func (w *{{.Name}}ColumnChunkWriter) WriteBatch(values []{{.name}}, defLevels, r
// writes a large number of values, the DataPage size can be much above the limit.
// The purpose of this chunking is to bound this. Even if a user writes large number
// of values, the chunking will ensure the AddDataPage() is called at a reasonable
// pagesize limit
// pagesize limit
var n int64
switch {
case defLevels != nil:
Expand All @@ -107,7 +107,7 @@ func (w *{{.Name}}ColumnChunkWriter) WriteBatch(values []{{.name}}, defLevels, r
valueOffset += toWrite
w.checkDictionarySizeLimit()
})
return
return
}

// WriteBatchSpaced writes a batch of repetition levels, definition levels, and values to the
Expand All @@ -132,7 +132,7 @@ func (w *{{.Name}}ColumnChunkWriter) WriteBatchSpaced(values []{{.name}}, defLev
length = len(values)
}
doBatches(int64(length), w.props.WriteBatchSize(), func(offset, batch int64) {
var vals []{{.name}}
var vals []{{.name}}
info := w.maybeCalculateValidityBits(levelSliceOrNil(defLevels, offset, batch), batch)

w.writeLevelsSpaced(batch, levelSliceOrNil(defLevels, offset, batch), levelSliceOrNil(repLevels, offset, batch))
Expand Down Expand Up @@ -165,7 +165,7 @@ func (w *{{.Name}}ColumnChunkWriter) WriteDictIndices(indices arrow.Array, defLe
}
}
}()

valueOffset := int64(0)
length := len(defLevels)
if defLevels == nil {
Expand Down Expand Up @@ -193,14 +193,22 @@ func (w *{{.Name}}ColumnChunkWriter) WriteDictIndices(indices arrow.Array, defLe

valueOffset += info.numSpaced()
})

return
}

func (w *{{.Name}}ColumnChunkWriter) writeValues(values []{{.name}}, numNulls int64) {
w.currentEncoder.(encoding.{{.Name}}Encoder).Put(values)
if w.pageStatistics != nil {
{{- if ne .Name "FixedLenByteArray"}}
w.pageStatistics.(*metadata.{{.Name}}Statistics).Update(values, numNulls)
{{- else}}
if w.Descr().LogicalType().Equals(schema.Float16LogicalType{}) {
w.pageStatistics.(*metadata.Float16Statistics).Update(values, numNulls)
} else {
w.pageStatistics.(*metadata.{{.Name}}Statistics).Update(values, numNulls)
}
{{- end}}
}
}

Expand All @@ -212,7 +220,15 @@ func (w *{{.Name}}ColumnChunkWriter) writeValuesSpaced(spacedValues []{{.name}},
}
if w.pageStatistics != nil {
nulls := numValues - numRead
{{- if ne .Name "FixedLenByteArray"}}
w.pageStatistics.(*metadata.{{.Name}}Statistics).UpdateSpaced(spacedValues, validBits, validBitsOffset, nulls)
{{- else}}
if w.Descr().LogicalType().Equals(schema.Float16LogicalType{}) {
w.pageStatistics.(*metadata.Float16Statistics).UpdateSpaced(spacedValues, validBits, validBitsOffset, nulls)
} else {
w.pageStatistics.(*metadata.{{.Name}}Statistics).UpdateSpaced(spacedValues, validBits, validBitsOffset, nulls)
}
{{- end}}
}
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion go/parquet/internal/gen-go/parquet/parquet-consts.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading