diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 0d439e0119ba9..a6e04e54259c9 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -1953,7 +1953,9 @@ TEST_F(TestLevels, ListErrors) { { ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make( "child_list", Repetition::REPEATED, - {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)}, + {GroupNode::Make("list", Repetition::REPEATED, + {PrimitiveNode::Make("element", Repetition::REQUIRED, + ParquetType::BOOLEAN)})}, LogicalType::List())); ASSERT_RAISES(Invalid, error); std::string expected("LIST-annotated groups must not be repeated."); diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 41e2350b08d4a..0ee595508fec4 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -633,11 +633,9 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, if (group.field_count() != 1) { return Status::Invalid("LIST-annotated groups must have a single child."); } - - // The Parquet spec requires that LIST-annotated group cannot be repeated when - // it applies normal three-level encoding. We need to figure out legacy list - // structures and do not enforce this rule for them. - bool is_legacy_list_structure = true; + if (group.is_repeated()) { + return Status::Invalid("LIST-annotated groups must not be repeated."); + } current_levels.Increment(group); @@ -677,8 +675,13 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, if (!list_group.logical_type()->is_list()) { return Status::Invalid("Group with one repeated child must be LIST-annotated."); } + // LIST-annotated group with three-level encoding cannot be repeated. + if (repeated_field->is_group() && + !static_cast(*repeated_field).field(0)->is_repeated()) { + return Status::Invalid("LIST-annotated groups must not be repeated."); + } RETURN_NOT_OK( - ListToSchemaField(list_group, current_levels, ctx, out, child_field)); + NodeToSchemaField(*repeated_field, current_levels, ctx, out, child_field)); } else if (HasListElementName(list_group, group)) { // We distinguish the special case that we have // @@ -702,7 +705,6 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, // } // // yields list ?nullable - is_legacy_list_structure = false; RETURN_NOT_OK( NodeToSchemaField(*repeated_field, current_levels, ctx, out, child_field)); } @@ -728,11 +730,6 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, RETURN_NOT_OK( PopulateLeaf(column_index, item_field, current_levels, ctx, out, child_field)); } - - if (!is_legacy_list_structure && group.is_repeated()) { - return Status::Invalid("LIST-annotated groups must not be repeated."); - } - out->field = ::arrow::field(group.name(), ::arrow::list(child_field->field), group.is_optional(), FieldIdMetadata(group.field_id())); out->level_info = current_levels;