diff --git a/go/arrow/float16/float16.go b/go/arrow/float16/float16.go index 184cf9d153287..ac55a4c0318ee 100644 --- a/go/arrow/float16/float16.go +++ b/go/arrow/float16/float16.go @@ -179,6 +179,8 @@ func (n Num) IsNaN() bool { return (n.bits & 0x7fff) > 0x7c00 } func (f Num) Uint16() uint16 { return f.bits } func (f Num) String() string { return strconv.FormatFloat(float64(f.Float32()), 'g', -1, 32) } +func FromBits(src uint16) Num { return Num{bits: src} } + func FromLEBytes(src []byte) Num { return Num{bits: binary.LittleEndian.Uint16(src)} } diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go index 2c8a2809dc784..6408803f0bb02 100644 --- a/go/parquet/internal/testutils/random.go +++ b/go/parquet/internal/testutils/random.go @@ -28,6 +28,7 @@ import ( "github.com/apache/arrow/go/v14/arrow/array" "github.com/apache/arrow/go/v14/arrow/bitutil" "github.com/apache/arrow/go/v14/arrow/endian" + "github.com/apache/arrow/go/v14/arrow/float16" "github.com/apache/arrow/go/v14/arrow/memory" "github.com/apache/arrow/go/v14/parquet" "github.com/apache/arrow/go/v14/parquet/pqarrow" @@ -369,6 +370,18 @@ func randFloat64(r *rand.Rand) float64 { } } +// randFloat16 creates a random float value with a normal distribution +// to better spread the values out and ensure we do not return any NaN or Inf values. +func randFloat16(r *rand.Rand) float16.Num { + for { + f16 := float16.FromBits(uint16(r.Uint64n(math.MaxUint16 + 1))) + f64 := float64(f16.Float32()) + if !math.IsNaN(f64) && !math.IsInf(f64, 0) { + return f16 + } + } +} + // FillRandomFloat32 populates out with random float32 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomFloat32(seed uint64, out []float32) { @@ -387,6 +400,15 @@ func FillRandomFloat64(seed uint64, out []float64) { } } +// FillRandomFloat16 populates out with random float64 values using seed as the random +// seed for the generator to allow consistency for testing. +func FillRandomFloat16(seed uint64, out []float16.Num) { + r := rand.New(rand.NewSource(seed)) + for idx := range out { + out[idx] = randFloat16(r) + } +} + // FillRandomByteArray populates out with random ByteArray values with lengths between 2 and 12 // using heap as the actual memory storage used for the bytes generated. Each element of // out will be some slice of the bytes in heap, and as such heap must outlive the byte array slices. @@ -455,6 +477,8 @@ func InitValues(values interface{}, heap *memory.Buffer) { FillRandomFloat32(0, arr) case []float64: FillRandomFloat64(0, arr) + case []float16.Num: + FillRandomFloat16(0, arr) case []parquet.Int96: FillRandomInt96(0, arr) case []parquet.ByteArray: diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go index 360b8e7476430..3585c6f71d394 100644 --- a/go/parquet/internal/testutils/random_arrow.go +++ b/go/parquet/internal/testutils/random_arrow.go @@ -19,6 +19,7 @@ package testutils import ( "github.com/apache/arrow/go/v14/arrow" "github.com/apache/arrow/go/v14/arrow/array" + "github.com/apache/arrow/go/v14/arrow/float16" "github.com/apache/arrow/go/v14/arrow/memory" "golang.org/x/exp/rand" ) @@ -49,6 +50,13 @@ func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Arra FillRandomFloat64(0, values) bldr.AppendValues(values, nil) return bldr.NewArray() + case arrow.FLOAT16: + bldr := array.NewFloat16Builder(mem) + defer bldr.Release() + values := make([]float16.Num, size) + FillRandomFloat16(0, values) + bldr.AppendValues(values, nil) + return bldr.NewArray() case arrow.INT64: bldr := array.NewInt64Builder(mem) defer bldr.Release() @@ -212,6 +220,21 @@ func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { values := make([]float64, size) FillRandomFloat64(0, values) + valid := make([]bool, size) + for idx := range valid { + valid[idx] = true + } + for i := 0; i < numNulls; i++ { + valid[i*2] = false + } + bldr.AppendValues(values, valid) + return bldr.NewArray() + case arrow.FLOAT16: + bldr := array.NewFloat16Builder(memory.DefaultAllocator) + defer bldr.Release() + values := make([]float16.Num, size) + FillRandomFloat16(0, values) + valid := make([]bool, size) for idx := range valid { valid[idx] = true