From 317a1d2f7eff6c02de79804e8917b937e1202dc2 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 21 Feb 2025 22:07:19 +0800 Subject: [PATCH] Fix missing bucket in terms aggregation with missing value Signed-off-by: kkewwei Signed-off-by: kkewwei --- CHANGELOG.md | 1 + .../aggregations/support/MissingValues.java | 2 +- .../bucket/terms/TermsAggregatorTests.java | 94 +++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ad53194361fa..7a5d2f6fe3a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add highlighting for wildcard search on `match_only_text` field ([#17101](https://github.com/opensearch-project/OpenSearch/pull/17101)) - Fix illegal argument exception when creating a PIT ([#16781](https://github.com/opensearch-project/OpenSearch/pull/16781)) - Fix HTTP API calls that hang with 'Accept-Encoding: zstd' ([#17408](https://github.com/opensearch-project/OpenSearch/pull/17408)) +- Fix missing bucket in terms aggregation with missing value ([#17418](https://github.com/opensearch-project/OpenSearch/pull/17418)) ### Security diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java b/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java index a5c685a0930e2..429a543281c76 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java @@ -359,7 +359,7 @@ public long getValueCount() { @Override public int docValueCount() { - return values.docValueCount(); + return Math.max(1, values.docValueCount()); } @Override diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index bc22d5f6ef2e8..e59b28d0a51ff 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.NumericDocValuesField; @@ -42,6 +43,8 @@ import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.NoMergePolicy; @@ -75,6 +78,8 @@ import org.opensearch.index.mapper.RangeFieldMapper; import org.opensearch.index.mapper.RangeType; import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.mapper.TextFieldMapper; +import org.opensearch.index.mapper.TextParams; import org.opensearch.index.mapper.Uid; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.QueryBuilders; @@ -1578,6 +1583,95 @@ public void testOrderByPipelineAggregation() throws Exception { } } + public void testBucketInTermsAggregationWithMissingValue() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + // test text + { + FieldType type = TextParams.buildFieldType(() -> true, () -> false, () -> "positions", () -> false, () -> "no"); + Document document = new Document(); + document.add(new Field("mv_field", "name1", type)); + document.add(new Field("mv_field", "name2", type)); + indexWriter.addDocument(document); + document = new Document(); + document.add(new Field("mv_field1", "value1", type)); + indexWriter.addDocument(document); + document = new Document(); + document.add(new Field("mv_field1", "value2", type)); + indexWriter.addDocument(document); + indexWriter.flush(); + try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { + IndexSearcher indexSearcher = newIndexSearcher(indexReader); + TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType("mv_field"); + fieldType.setFielddata(true); + + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("nick").userValueTypeHint(ValueType.STRING) + .field("mv_field") + .missing("no_nickname"); + TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + + aggregator.preCollection(); + indexSearcher.search(new MatchAllDocsQuery(), aggregator); + aggregator.postCollection(); + Terms result = reduce(aggregator); + assertEquals(3, result.getBuckets().size()); + assertEquals("no_nickname", result.getBuckets().get(0).getKeyAsString()); + assertEquals(2L, result.getBuckets().get(0).getDocCount()); + assertEquals("name1", result.getBuckets().get(1).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(1).getDocCount()); + assertEquals("name2", result.getBuckets().get(2).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(2).getDocCount()); + + } + indexWriter.deleteAll(); + } + + // test keyword + { + FieldType fieldtype = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + fieldtype.setDocValuesType(DocValuesType.SORTED_SET); + fieldtype.setIndexOptions(IndexOptions.NONE); + fieldtype.setStored(true); + + Document document = new Document(); + document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name1"))); + document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name2"))); + indexWriter.addDocument(document); + document = new Document(); + document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value1"))); + indexWriter.addDocument(document); + document = new Document(); + document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value2"))); + indexWriter.addDocument(document); + indexWriter.flush(); + try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { + IndexSearcher indexSearcher = newIndexSearcher(indexReader); + KeywordFieldMapper.KeywordFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("mv_field1"); + + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint( + ValueType.STRING + ).field("mv_field1").missing("no_nickname1"); + TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + + aggregator.preCollection(); + indexSearcher.search(new MatchAllDocsQuery(), aggregator); + aggregator.postCollection(); + Terms result = reduce(aggregator); + assertEquals(3, result.getBuckets().size()); + assertEquals("no_nickname1", result.getBuckets().get(0).getKeyAsString()); + assertEquals(2L, result.getBuckets().get(0).getDocCount()); + assertEquals("name1", result.getBuckets().get(1).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(1).getDocCount()); + assertEquals("name2", result.getBuckets().get(2).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(2).getDocCount()); + } + } + } + } + } + private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID(); private List generateDocsWithNested(String id, int value, int[] nestedValues) {