Skip to content

Commit

Permalink
Add pqarrow schema conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
benibus committed Sep 14, 2023
1 parent 625dc83 commit 5e8e392
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
6 changes: 6 additions & 0 deletions go/parquet/pqarrow/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
} else {
logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMicros)
}
case arrow.FLOAT16:
typ = parquet.Types.FixedLenByteArray
length = arrow.Float16SizeBytes
logicalType = schema.Float16LogicalType{}
case arrow.STRUCT:
return structToNode(field.Type.(*arrow.StructType), field.Name, field.Nullable, props, arrprops)
case arrow.FIXED_SIZE_LIST, arrow.LIST:
Expand Down Expand Up @@ -597,6 +601,8 @@ func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, erro
return arrowDecimal(logtype), nil
case schema.NoLogicalType, schema.IntervalLogicalType, schema.UUIDLogicalType:
return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil
case schema.Float16LogicalType:
return &arrow.Float16Type{}, nil
default:
return nil, xerrors.New("unhandled logical type " + logical.String() + " for fixed-length byte array")
}
Expand Down
20 changes: 19 additions & 1 deletion go/parquet/pqarrow/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,25 @@ func TestConvertArrowDecimals(t *testing.T) {
}
}

func TestConvertArrowFloat16(t *testing.T) {
parquetFields := make(schema.FieldList, 0)
arrowFields := make([]arrow.Field, 0)

parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("float16", parquet.Repetitions.Required,
schema.Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, -1)))
arrowFields = append(arrowFields, arrow.Field{Name: "float16", Type: &arrow.Float16Type{}})

arrowSchema := arrow.NewSchema(arrowFields, nil)
parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))

result, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true)))
assert.NoError(t, err)
assert.True(t, parquetSchema.Equals(result))
for i := 0; i < parquetSchema.NumColumns(); i++ {
assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
}
}

func TestCoerceTImestampV1(t *testing.T) {
parquetFields := make(schema.FieldList, 0)
arrowFields := make([]arrow.Field, 0)
Expand Down Expand Up @@ -418,7 +437,6 @@ func TestUnsupportedTypes(t *testing.T) {
typ arrow.DataType
}{
// Non-exhaustive list of unsupported types
{typ: &arrow.Float16Type{}},
{typ: &arrow.DurationType{}},
{typ: &arrow.DayTimeIntervalType{}},
{typ: &arrow.MonthIntervalType{}},
Expand Down

0 comments on commit 5e8e392

Please sign in to comment.