From f1475b1e76de10b59902283a33592e4956ce4a2f Mon Sep 17 00:00:00 2001
From: Oleksandr Kolomiiets <oleksandr.kolomiiets@elastic.co>
Date: Thu, 25 Apr 2024 08:09:08 -0700
Subject: [PATCH 1/8] Increase wait time SearchIdleIT#testSearchIdleStats
 (#107881)

---
 .../java/org/elasticsearch/index/shard/SearchIdleIT.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/SearchIdleIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/SearchIdleIT.java
index 199a397f52ad2..d9100e1e631db 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/SearchIdleIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/SearchIdleIT.java
@@ -224,7 +224,7 @@ public void testSearchIdleStats() throws InterruptedException {
             .get();
         waitUntil(
             () -> Arrays.stream(indicesAdmin().prepareStats(indexName).get().getShards()).allMatch(ShardStats::isSearchIdle),
-            searchIdleAfter,
+            searchIdleAfter + 1,
             TimeUnit.SECONDS
         );
 

From c03dfb1ddd3bd4409e23f08d44b473dcb7c44c08 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 25 Apr 2024 16:18:51 +0100
Subject: [PATCH 2/8] finish muting (#107906)

Follow on from #107898 which left some tests unmuted
---
 .../elasticsearch/xpack/application/CohereServiceUpgradeIT.java  | 1 +
 .../xpack/application/HuggingFaceServiceUpgradeIT.java           | 1 +
 .../elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/CohereServiceUpgradeIT.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/CohereServiceUpgradeIT.java
index 73676aa730883..c73827dba2cbb 100644
--- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/CohereServiceUpgradeIT.java
+++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/CohereServiceUpgradeIT.java
@@ -169,6 +169,7 @@ void assertEmbeddingInference(String inferenceId, CohereEmbeddingType type) thro
     }
 
     @SuppressWarnings("unchecked")
+    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107887")
     public void testRerank() throws IOException {
         var rerankSupported = getOldClusterTestVersion().onOrAfter(COHERE_RERANK_ADDED);
         assumeTrue("Cohere rerank service added in " + COHERE_RERANK_ADDED, rerankSupported);
diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/HuggingFaceServiceUpgradeIT.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/HuggingFaceServiceUpgradeIT.java
index 7e78b83223acf..718678f97f37f 100644
--- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/HuggingFaceServiceUpgradeIT.java
+++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/HuggingFaceServiceUpgradeIT.java
@@ -100,6 +100,7 @@ void assertEmbeddingInference(String inferenceId) throws IOException {
     }
 
     @SuppressWarnings("unchecked")
+    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107887")
     public void testElser() throws IOException {
         var supported = getOldClusterTestVersion().onOrAfter(HF_ELSER_ADDED);
         assumeTrue("HF elser service added in " + HF_ELSER_ADDED, supported);
diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java
index ac0a71ebb2c82..4e8e1c845b070 100644
--- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java
+++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java
@@ -111,6 +111,7 @@ void assertEmbeddingInference(String inferenceId) throws IOException {
     }
 
     @SuppressWarnings("unchecked")
+    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107887")
     public void testOpenAiCompletions() throws IOException {
         var openAiEmbeddingsSupported = getOldClusterTestVersion().onOrAfter(OPEN_AI_COMPLETIONS_ADDED);
         assumeTrue("OpenAI completions service added in " + OPEN_AI_COMPLETIONS_ADDED, openAiEmbeddingsSupported);

From 4e7a833418f73c830ac583c67bdb2674ca5b7e22 Mon Sep 17 00:00:00 2001
From: Mark Vieira <portugee@gmail.com>
Date: Thu, 25 Apr 2024 08:42:39 -0700
Subject: [PATCH 3/8] Reduce inference rolling upgrade test parallelism

---
 x-pack/plugin/inference/qa/rolling-upgrade/build.gradle | 1 +
 1 file changed, 1 insertion(+)

diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/build.gradle b/x-pack/plugin/inference/qa/rolling-upgrade/build.gradle
index 328444dacaf53..381f46cdf22bd 100644
--- a/x-pack/plugin/inference/qa/rolling-upgrade/build.gradle
+++ b/x-pack/plugin/inference/qa/rolling-upgrade/build.gradle
@@ -28,6 +28,7 @@ BuildParams.bwcVersions.withWireCompatible(v -> v.after("8.11.0")) { bwcVersion,
   tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) {
     usesBwcDistribution(bwcVersion)
     systemProperty("tests.old_cluster_version", bwcVersion)
+    maxParallelForks = 1
   }
 }
 

From a21242054b87e456f6e301dfa017e612b83ebbd6 Mon Sep 17 00:00:00 2001
From: Bogdan Pintea <bogdan.pintea@elastic.co>
Date: Thu, 25 Apr 2024 18:38:12 +0200
Subject: [PATCH 4/8] ESQL: Document BUCKET as a grouping function (#107864)

This adds the documentation for BUCKET as a grouping function and the
addition of the "direct" invocation mode providing a span (in addition
to the auto mode).
---
 .../esql/esql-functions-operators.asciidoc    |  7 ++
 docs/reference/esql/esql-get-started.asciidoc |  7 --
 .../functions/aggregation-functions.asciidoc  |  2 +-
 .../functions/date-time-functions.asciidoc    |  2 -
 .../esql/functions/examples/bucket.asciidoc   | 48 +++++++++-
 .../functions/grouping-functions.asciidoc     | 14 +++
 .../functions/kibana/definition/bucket.json   |  2 +
 .../src/main/resources/bucket.csv-spec        | 90 ++++++++++++-------
 .../expression/function/grouping/Bucket.java  | 18 +++-
 9 files changed, 145 insertions(+), 45 deletions(-)
 create mode 100644 docs/reference/esql/functions/grouping-functions.asciidoc

diff --git a/docs/reference/esql/esql-functions-operators.asciidoc b/docs/reference/esql/esql-functions-operators.asciidoc
index ddc077f3b8ff8..3ad61a8d56455 100644
--- a/docs/reference/esql/esql-functions-operators.asciidoc
+++ b/docs/reference/esql/esql-functions-operators.asciidoc
@@ -16,6 +16,12 @@ The reference documentation is divided into the following categories:
 include::functions/aggregation-functions.asciidoc[tag=agg_list]
 ====
 
+.*Grouping functions*
+[%collapsible]
+====
+include::functions/grouping-functions.asciidoc[tag=group_list]
+====
+
 .*Math functions*
 [%collapsible]
 ====
@@ -68,6 +74,7 @@ include::functions/operators.asciidoc[tag=op_list]
 ====
 
 include::functions/aggregation-functions.asciidoc[]
+include::functions/grouping-functions.asciidoc[]
 include::functions/math-functions.asciidoc[]
 include::functions/string-functions.asciidoc[]
 include::functions/date-time-functions.asciidoc[]
diff --git a/docs/reference/esql/esql-get-started.asciidoc b/docs/reference/esql/esql-get-started.asciidoc
index 0e23c0d97e61b..663b2f8ecd249 100644
--- a/docs/reference/esql/esql-get-started.asciidoc
+++ b/docs/reference/esql/esql-get-started.asciidoc
@@ -244,13 +244,6 @@ To track statistics over time, {esql} enables you to create histograms using the
 and returns a value for each row that corresponds to the resulting bucket the
 row falls into.
 
-For example, to create hourly buckets for the data on October 23rd:
-
-[source,esql]
-----
-include::{esql-specs}/bucket.csv-spec[tag=gs-bucket]
-----
-
 Combine `BUCKET` with <<esql-stats-by>> to create a histogram. For example,
 to count the number of events per hour:
 
diff --git a/docs/reference/esql/functions/aggregation-functions.asciidoc b/docs/reference/esql/functions/aggregation-functions.asciidoc
index 2fdc8582d6bfb..074fcce9ad43d 100644
--- a/docs/reference/esql/functions/aggregation-functions.asciidoc
+++ b/docs/reference/esql/functions/aggregation-functions.asciidoc
@@ -5,7 +5,7 @@
 <titleabbrev>Aggregate functions</titleabbrev>
 ++++
 
-The <<esql-stats-by>> function supports these aggregate functions:
+The <<esql-stats-by>> command supports these aggregate functions:
 
 // tag::agg_list[]
 * <<esql-agg-avg>>
diff --git a/docs/reference/esql/functions/date-time-functions.asciidoc b/docs/reference/esql/functions/date-time-functions.asciidoc
index 75aeea40bc608..8ce26eaabe381 100644
--- a/docs/reference/esql/functions/date-time-functions.asciidoc
+++ b/docs/reference/esql/functions/date-time-functions.asciidoc
@@ -8,7 +8,6 @@
 {esql} supports these date-time functions:
 
 // tag::date_list[]
-* <<esql-bucket>>
 * <<esql-date_diff>>
 * <<esql-date_extract>>
 * <<esql-date_format>>
@@ -17,7 +16,6 @@
 * <<esql-now>>
 // end::date_list[]
 
-include::layout/bucket.asciidoc[]
 include::layout/date_diff.asciidoc[]
 include::layout/date_extract.asciidoc[]
 include::layout/date_format.asciidoc[]
diff --git a/docs/reference/esql/functions/examples/bucket.asciidoc b/docs/reference/esql/functions/examples/bucket.asciidoc
index 0854840ffda34..f66f737b7d4b5 100644
--- a/docs/reference/esql/functions/examples/bucket.asciidoc
+++ b/docs/reference/esql/functions/examples/bucket.asciidoc
@@ -2,6 +2,10 @@
 
 *Examples*
 
+`BUCKET` can work in two modes: one in which the size of the bucket is computed
+based on a buckets count recommendation (four parameters) and a range, and
+another in which the bucket size is provided directly (two parameters).
+
 Using a target number of buckets, a start of a range, and an end of a range,
 `BUCKET` picks an appropriate bucket size to generate the target number of buckets or fewer.
 For example, asking for at most 20 buckets over a year results in monthly buckets:
@@ -17,7 +21,7 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonth-result]
 The goal isn't to provide *exactly* the target number of buckets,
 it's to pick a range that people are comfortable with that provides at most the target number of buckets.
 
-Combine `BUCKET` with <<esql-stats-by>> to create a histogram:
+Combine `BUCKET` with an <<esql-agg-functions,aggregation>> to create a histogram:
 [source.merge.styled,esql]
 ----
 include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram]
@@ -28,7 +32,7 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram-result]
 |===
 
 NOTE: `BUCKET` does not create buckets that don't match any documents.
-+ "That's why this example is missing `1985-03-01` and other dates.
+That's why this example is missing `1985-03-01` and other dates.
 
 Asking for more buckets can result in a smaller range.
 For example, asking for at most 100 buckets in a year results in weekly buckets:
@@ -45,6 +49,20 @@ NOTE: `BUCKET` does not filter any rows. It only uses the provided range to pick
 For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range.
 Combine`BUCKET` with <<esql-where>> to filter rows.
 
+If the desired bucket size is known in advance, simply provide it as the second
+argument, leaving the range out:
+[source.merge.styled,esql]
+----
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan-result]
+|===
+
+NOTE: When providing the bucket size as the second parameter, it must be a time
+duration or date period.
+
 `BUCKET` can also operate on numeric fields. For example, to create a salary histogram:
 [source.merge.styled,esql]
 ----
@@ -58,6 +76,20 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumeric-result]
 Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range.
 You have to find the `min` and `max` separately. {esql} doesn't yet have an easy way to do that automatically.
 
+The range can be omitted if the desired bucket size is known in advance. Simply
+provide it as the second argument:
+[source.merge.styled,esql]
+----
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumericWithSpan]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumericWithSpan-result]
+|===
+
+NOTE: When providing the bucket size as the second parameter, it must be
+of a floating point type.
+
 Create hourly buckets for the last 24 hours, and calculate the number of events per hour:
 [source.merge.styled,esql]
 ----
@@ -77,3 +109,15 @@ include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg]
 include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg-result]
 |===
 
+`BUCKET` may be used in both the aggregating and grouping part of the
+<<esql-stats-by, STATS ... BY ...>> command provided that in the aggregating
+part the function is referenced by an alias defined in the
+grouping part, or that it is invoked with the exact same expression:
+[source.merge.styled,esql]
+----
+include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression-result]
+|===
diff --git a/docs/reference/esql/functions/grouping-functions.asciidoc b/docs/reference/esql/functions/grouping-functions.asciidoc
new file mode 100644
index 0000000000000..ed0caf5ec2a4c
--- /dev/null
+++ b/docs/reference/esql/functions/grouping-functions.asciidoc
@@ -0,0 +1,14 @@
+[[esql-group-functions]]
+==== {esql} grouping functions
+
+++++
+<titleabbrev>Grouping functions</titleabbrev>
+++++
+
+The <<esql-stats-by>> command supports these grouping functions:
+
+// tag::group_list[]
+* <<esql-bucket>>
+// end::group_list[]
+
+include::layout/bucket.asciidoc[]
diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json
index bab6cef538b07..986c0e8f91d33 100644
--- a/docs/reference/esql/functions/kibana/definition/bucket.json
+++ b/docs/reference/esql/functions/kibana/definition/bucket.json
@@ -939,7 +939,9 @@
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT hire_date",
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT month",
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT week",
+    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)\n| SORT week",
     "FROM employees\n| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)\n| SORT bs",
+    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)\n| SORT b",
     "FROM sample_data \n| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()\n| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())",
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket"
   ]
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
index ad8f16f89cf7e..f41bf3f020eb5 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
@@ -175,13 +175,13 @@ FROM employees
 ;
 
 //tag::docsBucketMonthlyHistogram-result[]
-  hires_per_month:long    |    month:date
-2              |1985-02-01T00:00:00.000Z
-1              |1985-05-01T00:00:00.000Z
-1              |1985-07-01T00:00:00.000Z
-1              |1985-09-01T00:00:00.000Z
-2              |1985-10-01T00:00:00.000Z
-4              |1985-11-01T00:00:00.000Z
+  hires_per_month:long |    month:date
+2                      |1985-02-01T00:00:00.000Z
+1                      |1985-05-01T00:00:00.000Z
+1                      |1985-07-01T00:00:00.000Z
+1                      |1985-09-01T00:00:00.000Z
+2                      |1985-10-01T00:00:00.000Z
+4                      |1985-11-01T00:00:00.000Z
 //end::docsBucketMonthlyHistogram-result[]
 ;
 
@@ -196,15 +196,36 @@ FROM employees
 
 //tag::docsBucketWeeklyHistogram-result[]
   hires_per_week:long    |    week:date
-2              |1985-02-18T00:00:00.000Z
-1              |1985-05-13T00:00:00.000Z
-1              |1985-07-08T00:00:00.000Z
-1              |1985-09-16T00:00:00.000Z
-2              |1985-10-14T00:00:00.000Z
-4              |1985-11-18T00:00:00.000Z
+2                        |1985-02-18T00:00:00.000Z
+1                        |1985-05-13T00:00:00.000Z
+1                        |1985-07-08T00:00:00.000Z
+1                        |1985-09-16T00:00:00.000Z
+2                        |1985-10-14T00:00:00.000Z
+4                        |1985-11-18T00:00:00.000Z
 //end::docsBucketWeeklyHistogram-result[]
 ;
 
+// bucketing in span mode (identical results to above)
+docsBucketWeeklyHistogramWithSpan#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketWeeklyHistogramWithSpan[]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)
+| SORT week
+//end::docsBucketWeeklyHistogramWithSpan[]
+;
+
+//tag::docsBucketWeeklyHistogramWithSpan-result[]
+  hires_per_week:long    |    week:date
+2                        |1985-02-18T00:00:00.000Z
+1                        |1985-05-13T00:00:00.000Z
+1                        |1985-07-08T00:00:00.000Z
+1                        |1985-09-16T00:00:00.000Z
+2                        |1985-10-14T00:00:00.000Z
+4                        |1985-11-18T00:00:00.000Z
+//end::docsBucketWeeklyHistogramWithSpan-result[]
+;
+
 docsBucketLast24hr#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
 //tag::docsBucketLast24hr[]
 FROM sample_data 
@@ -218,17 +239,6 @@ FROM sample_data
 //end::docsBucketLast24hr-result[]
 ;
 
-docsGettingStartedBucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::gs-bucket[]
-FROM sample_data
-| STATS BY bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
-// end::gs-bucket[]
-| LIMIT 0
-;
-
-bucket:date
-;
-
 docsGettingStartedBucketStatsBy#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
 // tag::gs-bucket-stats-by[]
 FROM sample_data
@@ -352,12 +362,15 @@ FROM employees
 
 // bucketing in span mode (identical results to above)
 bucketNumericWithSpan#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+//tag::docsBucketNumericWithSpan[]
 FROM employees
 | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
 | STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
 | SORT b
+//end::docsBucketNumericWithSpan[]
 ;
 
+//tag::docsBucketNumericWithSpan-result[]
        c:long  |       b:double
 1              |25000.0
 1              |30000.0
@@ -368,6 +381,7 @@ FROM employees
 1              |60000.0
 1              |65000.0
 1              |70000.0
+//end::docsBucketNumericWithSpan-result[]
 ;
 
 bucketNumericMixedTypes#[skip:-8.13.99, reason:BUCKET extended in 8.14]
@@ -439,14 +453,28 @@ FROM employees
 ;
 
 reuseGroupingFunctionWithExpression#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::reuseGroupingFunctionWithExpression[]
 FROM employees
-| STATS sum = BUCKET(salary % 2 + 13, 1.) + 1 BY bucket = BUCKET(salary % 2 + 13, 1.)
-| SORT sum
-;
-
- sum:double    | bucket:double
-14.0           |13.0
-15.0           |14.0
+| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
+| SORT b1, b2
+| KEEP s1, b1, s2, b2
+//end::reuseGroupingFunctionWithExpression[]
+;
+
+//tag::reuseGroupingFunctionWithExpression-result[]
+ s1:double | b1:double | s2:double   | b2:double
+351.0      |350.0      |1002.0       |1000.0
+401.0      |400.0      |1002.0       |1000.0
+451.0      |450.0      |1002.0       |1000.0
+501.0      |500.0      |1002.0       |1000.0
+551.0      |550.0      |1002.0       |1000.0
+601.0      |600.0      |1002.0       |1000.0
+601.0      |600.0      |1052.0       |1050.0
+651.0      |650.0      |1052.0       |1050.0
+701.0      |700.0      |1052.0       |1050.0
+751.0      |750.0      |1052.0       |1050.0
+801.0      |800.0      |1052.0       |1050.0
+//end::reuseGroupingFunctionWithExpression-result[]
 ;
 
 reuseGroupingFunctionWithinAggs#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
index f83c10e4ce1f6..218d469d626f9 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
@@ -92,6 +92,10 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
         examples = {
             @Example(
                 description = """
+                    `BUCKET` can work in two modes: one in which the size of the bucket is computed
+                    based on a buckets count recommendation (four parameters) and a range and
+                    another in which the bucket size is provided directly (two parameters).
+
                     Using a target number of buckets, a start of a range, and an end of a range,
                     `BUCKET` picks an appropriate bucket size to generate the target number of buckets or fewer.
                     For example, asking for at most 20 buckets over a year results in monthly buckets:""",
@@ -102,12 +106,12 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
                     it's to pick a range that people are comfortable with that provides at most the target number of buckets."""
             ),
             @Example(
-                description = "Combine `BUCKET` with <<esql-stats-by>> to create a histogram:",
+                description = "Combine `BUCKET` with an <<esql-agg-functions,aggregation>> to create a histogram:",
                 file = "bucket",
                 tag = "docsBucketMonthlyHistogram",
                 explanation = """
                     NOTE: `BUCKET` does not create buckets that don't match any documents.
-                    + "That's why this example is missing `1985-03-01` and other dates."""
+                    That's why this example is missing `1985-03-01` and other dates."""
             ),
             @Example(
                 description = """
@@ -120,6 +124,11 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
                     For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range.
                     Combine`BUCKET` with <<esql-where>> to filter rows."""
             ),
+            @Example(description = """
+                If the desired bucket size is known in advance, simply provide it as the second
+                argument, leaving the range out:""", file = "bucket", tag = "docsBucketWeeklyHistogramWithSpan", explanation = """
+                NOTE: When providing the bucket size as the second parameter, its type must be
+                of a time duration or date period type."""),
             @Example(
                 description = "`BUCKET` can also operate on numeric fields. For example, to create a salary histogram:",
                 file = "bucket",
@@ -128,6 +137,11 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
                     Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range.
                     You have to find the `min` and `max` separately. {esql} doesn't yet have an easy way to do that automatically."""
             ),
+            @Example(description = """
+                If the desired bucket size is known in advance, simply provide it as the second
+                argument, leaving the range out:""", file = "bucket", tag = "docsBucketNumericWithSpan", explanation = """
+                NOTE: When providing the bucket size as the second parameter, its type must be
+                of a floating type."""),
             @Example(
                 description = "Create hourly buckets for the last 24 hours, and calculate the number of events per hour:",
                 file = "bucket",

From cfc9e44b63d3a24b7f7b49579547ffe0951f624e Mon Sep 17 00:00:00 2001
From: Simon Cooper <simon.cooper@elastic.co>
Date: Thu, 25 Apr 2024 17:53:13 +0100
Subject: [PATCH 5/8] Reduce parallelism for yaml and java esql rest tests
 (#107890)

This fixes #107879

Reduce parallelism for java rest tests for esql
---
 x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle | 1 +
 1 file changed, 1 insertion(+)

diff --git a/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle b/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle
index c25ef858534e0..5515ef0728a72 100644
--- a/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle
+++ b/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle
@@ -40,6 +40,7 @@ BuildParams.bwcVersions.withWireCompatible(supportedVersion) { bwcVersion, baseN
     usesBwcDistribution(bwcVersion)
     systemProperty("tests.old_cluster_version", bwcVersion)
     systemProperty("tests.version_parameter_unsupported", versionUnsupported(bwcVersion))
+    maxParallelForks = 1
   }
 
   def yamlRestTest = tasks.register("v${bwcVersion}#yamlRestTest", StandaloneRestIntegTestTask) {

From fdefe090418aed09243fb150548909e3e0c4ff0e Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 25 Apr 2024 20:11:44 +0300
Subject: [PATCH 6/8] Fix for from parameter when using sub_searches and rank
 (#106253)

---
 docs/changelog/106253.yaml                    |    6 +
 docs/reference/search/retriever.asciidoc      |    4 +-
 docs/reference/search/rrf.asciidoc            |   86 +-
 .../action/search/SearchPhaseController.java  |    4 +
 .../action/search/SearchRequest.java          |    6 +-
 .../search/query/QueryPhase.java              |    2 +-
 .../search/rank/RankBuilder.java              |   22 +-
 .../QueryPhaseRankCoordinatorContext.java     |    8 +-
 .../context/QueryPhaseRankShardContext.java   |   14 +-
 .../action/search/SearchRequestTests.java     |   15 +-
 .../search/rank/TestRankBuilder.java          |    2 +-
 .../RRFQueryPhaseRankCoordinatorContext.java  |   12 +-
 .../rrf/RRFQueryPhaseRankShardContext.java    |   13 +-
 .../xpack/rank/rrf/RRFRankBuilder.java        |   11 +-
 .../xpack/rank/rrf/RRFRetrieverBuilder.java   |   14 +-
 .../xpack/rank/rrf/RRFRankBuilderTests.java   |    6 +-
 .../xpack/rank/rrf/RRFRankContextTests.java   |    4 +-
 .../rrf/RRFRetrieverBuilderParsingTests.java  |    2 +-
 .../rest-api-spec/test/rrf/100_rank_rrf.yml   |   44 +-
 .../test/rrf/150_rank_rrf_pagination.yml      | 1055 +++++++++++++++++
 .../test/rrf/200_rank_rrf_script.yml          |    6 +-
 .../test/rrf/300_rrf_retriever.yml            |    8 +-
 .../test/rrf/400_rrf_retriever_script.yml     |    6 +-
 23 files changed, 1271 insertions(+), 79 deletions(-)
 create mode 100644 docs/changelog/106253.yaml
 create mode 100644 x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/150_rank_rrf_pagination.yml

diff --git a/docs/changelog/106253.yaml b/docs/changelog/106253.yaml
new file mode 100644
index 0000000000000..b80cda37f63c7
--- /dev/null
+++ b/docs/changelog/106253.yaml
@@ -0,0 +1,6 @@
+pr: 106253
+summary: Fix for from parameter when using `sub_searches` and rank
+area: Ranking
+type: bug
+issues:
+ - 99011
diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc
index 42d2129f0fdec..6301f439e9b5b 100644
--- a/docs/reference/search/retriever.asciidoc
+++ b/docs/reference/search/retriever.asciidoc
@@ -189,7 +189,7 @@ GET /index/_search
                 }
             ],
             "rank_constant": ...
-            "window_size": ...
+            "rank_window_size": ...
         }
     }
 }
@@ -202,7 +202,7 @@ The <<search-from-param, `from`>> and <<search-size-param, `size`>>
 parameters are provided globally as part of the general
 <<search-search, search API>>. They are applied to all retrievers in a
 retriever tree unless a specific retriever overrides the `size` parameter
-using a different parameter such as `window_size`. Though, the final
+using a different parameter such as `rank_window_size`. Though, the final
 search hits are always limited to `size`.
 
 ==== Using aggregations with a retriever tree
diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
index 96477cdee45f1..c541750fff789 100644
--- a/docs/reference/search/rrf.asciidoc
+++ b/docs/reference/search/rrf.asciidoc
@@ -74,7 +74,7 @@ GET example-index/_search
                     }
                 }
             ],
-            "window_size": 50,
+            "rank_window_size": 50,
             "rank_constant": 20
         }
     }
@@ -94,8 +94,8 @@ its global top 50 results.
 the query top documents and rank them based on the RRF formula using parameters from
 the `rrf` retriever to get the combined top documents using the default `size` of `10`.
 
-Note that if `k` from a knn search is larger than `window_size`, the results are
-truncated to `window_size`. If `k` is smaller than `window_size`, the results are
+Note that if `k` from a knn search is larger than `rank_window_size`, the results are
+truncated to `rank_window_size`. If `k` is smaller than `rank_window_size`, the results are
 `k` size.
 
 [[rrf-supported-features]]
@@ -160,7 +160,7 @@ GET example-index/_search
                     }
                 }
             ],
-            "window_size": 50,
+            "rank_window_size": 50,
             "rank_constant": 20
         }
     }
@@ -289,7 +289,7 @@ GET example-index/_search
                     }
                 }
             ],
-            "window_size": 5,
+            "rank_window_size": 5,
             "rank_constant": 1
         }
     },
@@ -510,8 +510,82 @@ _id: 5 =             1.0/(1+4) = 0.2000
 ----
 // NOTCONSOLE
 
-We rank the documents based on the RRF formula with a `window_size` of `5`
+We rank the documents based on the RRF formula with a `rank_window_size` of `5`
 truncating the bottom `2` docs in our RRF result set with a `size` of `3`.
 We end with `_id: 3` as `_rank: 1`, `_id: 2` as `_rank: 2`, and
 `_id: 4` as `_rank: 3`. This ranking matches the result set from the
 original RRF search as expected.
+
+
+==== Pagination in RRF
+
+When using `rrf` you can paginate through the results using the `from` parameter.
+As the final ranking is solely dependent on the original query ranks, to ensure
+consistency when paginating, we have to make sure that while `from` changes, the order
+of what we have already seen remains intact. To that end, we're using a fixed `rank_window_size`
+as the whole available result set upon which we can paginate.
+This essentially means that if:
+
+* `from + size` &le; `rank_window_size` : we could get  `results[from: from+size]` documents back from
+the final `rrf` ranked result set
+
+* `from + size` &gt; `rank_window_size` : we would get 0 results back, as the request would fall outside the
+available `rank_window_size`-sized result set.
+
+An important thing to note here is that since `rank_window_size` is all the results that we'll get to see
+from the individual query components, pagination guarantees consistency,  i.e. no documents are skipped
+or duplicated in multiple pages, iff `rank_window_size` remains the same. If `rank_window_size` changes, then the order
+of the results might change as well, even for the same ranks.
+
+To illustrate all of the above, let's consider the following simplified example where we have
+two queries, `queryA` and `queryB` and their ranked documents:
+[source,python]
+----
+     |  queryA   |  queryB    |
+_id: |  1        |  5         |
+_id: |  2        |  4         |
+_id: |  3        |  3         |
+_id: |  4        |  1         |
+_id: |           |  2         |
+----
+// NOTCONSOLE
+
+For `rank_window_size=5` we would get to see all documents from both `queryA` and `queryB`.
+Assuming a `rank_constant=1`, the `rrf` scores would be:
+[source,python]
+----
+# doc   | queryA     | queryB       | score
+_id: 1 =  1.0/(1+1)  + 1.0/(1+4)      = 0.7
+_id: 2 =  1.0/(1+2)  + 1.0/(1+5)      = 0.5
+_id: 3 =  1.0/(1+3)  + 1.0/(1+3)      = 0.5
+_id: 4 =  1.0/(1+4)  + 1.0/(1+2)      = 0.533
+_id: 5 =    0        + 1.0/(1+1)      = 0.5
+----
+// NOTCONSOLE
+
+So the final ranked result set would be [`1`, `4`, `2`, `3`, `5`] and we would paginate over that, since
+`rank_window_size == len(results)`. In this scenario, we would have:
+
+* `from=0, size=2` would return documents [`1`, `4`] with ranks `[1, 2]`
+* `from=2, size=2` would return documents [`2`, `3`] with ranks `[3, 4]`
+* `from=4, size=2` would return document [`5`] with rank `[5]`
+* `from=6, size=2` would return an empty result set as it there are no more results to iterate over
+
+Now, if we had a `rank_window_size=2`, we would only get to see `[1, 2]` and `[5, 4]` documents
+for queries `queryA` and `queryB` respectively. Working out the math, we would see that the results would now
+be slightly different, because we would have no knowledge of the documents in positions `[3: end]` for either query.
+[source,python]
+----
+# doc   | queryA     | queryB         | score
+_id: 1 =  1.0/(1+1)  + 0              = 0.5
+_id: 2 =  1.0/(1+2)  + 0              = 0.33
+_id: 4 =    0        + 1.0/(1+2)      = 0.33
+_id: 5 =    0        + 1.0/(1+1)      = 0.5
+----
+// NOTCONSOLE
+
+The final ranked result set would be [`1`, `5`, `2`, `4`], and we would be able to paginate
+on the top `rank_window_size` results, i.e. [`1`, `5`]. So for the same params as above, we would now have:
+
+* `from=0, size=2` would return [`1`, `5`] with ranks `[1, 2]`
+* `from=2, size=2` would return an empty result set as it would fall outside the available `rank_window_size` results.
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
index c9b08b5aed4ac..8cc3c6f003fb5 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
@@ -643,6 +643,10 @@ static ReducedQueryPhase reducedQueryPhase(
             );
             sortedTopDocs = new SortedTopDocs(rankedDocs, false, null, null, null, 0);
             size = sortedTopDocs.scoreDocs.length;
+            // we need to reset from here as pagination and result trimming has already taken place
+            // within the `QueryPhaseRankCoordinatorContext#rankQueryPhaseResults` and we don't want
+            // to apply it again in the `getHits` method.
+            from = 0;
         }
         final TotalHits totalHits = topDocsStats.getTotalHits();
         return new ReducedQueryPhase(
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java
index a20846ab98a4d..12167c8361513 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java
@@ -397,10 +397,10 @@ public ActionRequestValidationException validate() {
                 if (size == 0) {
                     validationException = addValidationError("[rank] requires [size] greater than [0]", validationException);
                 }
-                if (size > source.rankBuilder().windowSize()) {
+                if (size > source.rankBuilder().rankWindowSize()) {
                     validationException = addValidationError(
-                        "[rank] requires [window_size: "
-                            + source.rankBuilder().windowSize()
+                        "[rank] requires [rank_window_size: "
+                            + source.rankBuilder().rankWindowSize()
                             + "]"
                             + " be greater than or equal to [size: "
                             + size
diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
index cd8b5494ac31f..828c6d2b4f3e8 100644
--- a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
@@ -98,7 +98,7 @@ static void executeRank(SearchContext searchContext) throws QueryPhaseExecutionE
                 RankSearchContext rankSearchContext = new RankSearchContext(
                     searchContext,
                     rankQuery,
-                    queryPhaseRankShardContext.windowSize()
+                    queryPhaseRankShardContext.rankWindowSize()
                 )
             ) {
                 QueryPhase.addCollectorsAndSearch(rankSearchContext);
diff --git a/server/src/main/java/org/elasticsearch/search/rank/RankBuilder.java b/server/src/main/java/org/elasticsearch/search/rank/RankBuilder.java
index e0e04c563a9a8..7118c9f49b36d 100644
--- a/server/src/main/java/org/elasticsearch/search/rank/RankBuilder.java
+++ b/server/src/main/java/org/elasticsearch/search/rank/RankBuilder.java
@@ -30,22 +30,22 @@
  */
 public abstract class RankBuilder implements VersionedNamedWriteable, ToXContentObject {
 
-    public static final ParseField WINDOW_SIZE_FIELD = new ParseField("window_size");
+    public static final ParseField RANK_WINDOW_SIZE_FIELD = new ParseField("rank_window_size");
 
     public static final int DEFAULT_WINDOW_SIZE = SearchService.DEFAULT_SIZE;
 
-    private final int windowSize;
+    private final int rankWindowSize;
 
-    public RankBuilder(int windowSize) {
-        this.windowSize = windowSize;
+    public RankBuilder(int rankWindowSize) {
+        this.rankWindowSize = rankWindowSize;
     }
 
     public RankBuilder(StreamInput in) throws IOException {
-        windowSize = in.readVInt();
+        rankWindowSize = in.readVInt();
     }
 
     public final void writeTo(StreamOutput out) throws IOException {
-        out.writeVInt(windowSize);
+        out.writeVInt(rankWindowSize);
         doWriteTo(out);
     }
 
@@ -55,7 +55,7 @@ public final void writeTo(StreamOutput out) throws IOException {
     public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
         builder.startObject(getWriteableName());
-        builder.field(WINDOW_SIZE_FIELD.getPreferredName(), windowSize);
+        builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize);
         doXContent(builder, params);
         builder.endObject();
         builder.endObject();
@@ -64,8 +64,8 @@ public final XContentBuilder toXContent(XContentBuilder builder, Params params)
 
     protected abstract void doXContent(XContentBuilder builder, Params params) throws IOException;
 
-    public int windowSize() {
-        return windowSize;
+    public int rankWindowSize() {
+        return rankWindowSize;
     }
 
     /**
@@ -88,14 +88,14 @@ public final boolean equals(Object obj) {
         }
         @SuppressWarnings("unchecked")
         RankBuilder other = (RankBuilder) obj;
-        return Objects.equals(windowSize, other.windowSize()) && doEquals(other);
+        return Objects.equals(rankWindowSize, other.rankWindowSize()) && doEquals(other);
     }
 
     protected abstract boolean doEquals(RankBuilder other);
 
     @Override
     public final int hashCode() {
-        return Objects.hash(getClass(), windowSize, doHashCode());
+        return Objects.hash(getClass(), rankWindowSize, doHashCode());
     }
 
     protected abstract int doHashCode();
diff --git a/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankCoordinatorContext.java b/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankCoordinatorContext.java
index 181122380f22d..1be8544758a8f 100644
--- a/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankCoordinatorContext.java
+++ b/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankCoordinatorContext.java
@@ -22,15 +22,15 @@
  */
 public abstract class QueryPhaseRankCoordinatorContext {
 
-    protected final int windowSize;
+    protected final int rankWindowSize;
 
-    public QueryPhaseRankCoordinatorContext(int windowSize) {
-        this.windowSize = windowSize;
+    public QueryPhaseRankCoordinatorContext(int rankWindowSize) {
+        this.rankWindowSize = rankWindowSize;
     }
 
     /**
      * This is used to pull information passed back from the shards as part of {@link QuerySearchResult#getRankShardResult()}
-     * and return a {@link ScoreDoc[]} of the `window_size` ranked results. Note that {@link TopDocsStats} is included so that
+     * and return a {@link ScoreDoc[]} of the `rank_window_size` ranked results. Note that {@link TopDocsStats} is included so that
      * appropriate stats may be updated based on rank results.
      * This is called when reducing query results through {@code SearchPhaseController#reducedQueryPhase()}.
      */
diff --git a/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankShardContext.java b/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankShardContext.java
index e8bac25009e8c..f562977afb857 100644
--- a/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankShardContext.java
+++ b/server/src/main/java/org/elasticsearch/search/rank/context/QueryPhaseRankShardContext.java
@@ -22,27 +22,27 @@
 public abstract class QueryPhaseRankShardContext {
 
     protected final List<Query> queries;
-    protected final int windowSize;
+    protected final int rankWindowSize;
 
-    public QueryPhaseRankShardContext(List<Query> queries, int windowSize) {
+    public QueryPhaseRankShardContext(List<Query> queries, int rankWindowSize) {
         this.queries = queries;
-        this.windowSize = windowSize;
+        this.rankWindowSize = rankWindowSize;
     }
 
     public List<Query> queries() {
         return queries;
     }
 
-    public int windowSize() {
-        return windowSize;
+    public int rankWindowSize() {
+        return rankWindowSize;
     }
 
     /**
      * This is used to reduce the number of required results that are serialized
-     * to the coordinating node. Normally we would have to serialize {@code queries * window_size}
+     * to the coordinating node. Normally we would have to serialize {@code queries * rank_window_size}
      * results, but we can infer that there will likely be overlap of document results. Given that we
      * know any searches that match the same document must be on the same shard, we can sort on the shard
-     * instead for a top window_size set of results and reduce the amount of data we serialize.
+     * instead for a top rank_window_size set of results and reduce the amount of data we serialize.
      */
     public abstract RankShardResult combineQueryPhaseResults(List<TopDocs> rankResults);
 }
diff --git a/server/src/test/java/org/elasticsearch/action/search/SearchRequestTests.java b/server/src/test/java/org/elasticsearch/action/search/SearchRequestTests.java
index f7f58ef06ccdc..9fd2cd1206ee8 100644
--- a/server/src/test/java/org/elasticsearch/action/search/SearchRequestTests.java
+++ b/server/src/test/java/org/elasticsearch/action/search/SearchRequestTests.java
@@ -400,7 +400,7 @@ public void testValidate() throws IOException {
             assertNotNull(validationErrors);
             assertEquals(1, validationErrors.validationErrors().size());
             assertEquals(
-                "[rank] requires [window_size: 1] be greater than or equal to [size: 2]",
+                "[rank] requires [rank_window_size: 1] be greater than or equal to [size: 2]",
                 validationErrors.validationErrors().get(0)
             );
         }
@@ -437,10 +437,21 @@ public void testValidate() throws IOException {
             assertNotNull(validationErrors);
             assertEquals(1, validationErrors.validationErrors().size());
             assertEquals(
-                "[rank] requires [window_size: 9] be greater than or equal to [size: 10]",
+                "[rank] requires [rank_window_size: 9] be greater than or equal to [size: 10]",
                 validationErrors.validationErrors().get(0)
             );
         }
+        {
+            SearchRequest searchRequest = new SearchRequest().source(
+                new SearchSourceBuilder().rankBuilder(new TestRankBuilder(3))
+                    .query(QueryBuilders.termQuery("field", "term"))
+                    .knnSearch(List.of(new KnnSearchBuilder("vector", new float[] { 0f }, 10, 100, null)))
+                    .size(3)
+                    .from(4)
+            );
+            ActionRequestValidationException validationErrors = searchRequest.validate();
+            assertNull(validationErrors);
+        }
         {
             SearchRequest searchRequest = new SearchRequest().source(
                 new SearchSourceBuilder().rankBuilder(new TestRankBuilder(100))
diff --git a/test/framework/src/main/java/org/elasticsearch/search/rank/TestRankBuilder.java b/test/framework/src/main/java/org/elasticsearch/search/rank/TestRankBuilder.java
index 691d541913716..8e2a2c96a31ab 100644
--- a/test/framework/src/main/java/org/elasticsearch/search/rank/TestRankBuilder.java
+++ b/test/framework/src/main/java/org/elasticsearch/search/rank/TestRankBuilder.java
@@ -35,7 +35,7 @@ public class TestRankBuilder extends RankBuilder {
     );
 
     static {
-        PARSER.declareInt(optionalConstructorArg(), WINDOW_SIZE_FIELD);
+        PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD);
     }
 
     public static TestRankBuilder fromXContent(XContentParser parser) throws IOException {
diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankCoordinatorContext.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankCoordinatorContext.java
index 1b3ebe19ce494..b6a1ad52d5d15 100644
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankCoordinatorContext.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankCoordinatorContext.java
@@ -62,7 +62,7 @@ public ScoreDoc[] rankQueryPhaseResults(List<QuerySearchResult> querySearchResul
 
                 for (int qi = 0; qi < queryCount; ++qi) {
                     final int fqi = qi;
-                    queues.add(new PriorityQueue<>(windowSize + from) {
+                    queues.add(new PriorityQueue<>(rankWindowSize) {
                         @Override
                         protected boolean lessThan(RRFRankDoc a, RRFRankDoc b) {
                             float score1 = a.scores[fqi];
@@ -105,7 +105,7 @@ protected boolean lessThan(RRFRankDoc a, RRFRankDoc b) {
         // score if we already saw it as part of a previous query's
         // doc set, otherwise we make a new doc and calculate the
         // initial score
-        Map<RankKey, RRFRankDoc> results = Maps.newMapWithExpectedSize(queryCount * windowSize);
+        Map<RankKey, RRFRankDoc> results = Maps.newMapWithExpectedSize(queryCount * rankWindowSize);
         final int fqc = queryCount;
         for (int qi = 0; qi < queryCount; ++qi) {
             PriorityQueue<RRFRankDoc> queue = queues.get(qi);
@@ -127,6 +127,11 @@ protected boolean lessThan(RRFRankDoc a, RRFRankDoc b) {
             }
         }
 
+        // return if pagination requested is outside the results
+        if (results.values().size() - from <= 0) {
+            return new ScoreDoc[0];
+        }
+
         // sort the results based on rrf score, tiebreaker based on
         // larger individual query score from 1 to n, smaller shard then smaller doc id
         RRFRankDoc[] sortedResults = results.values().toArray(RRFRankDoc[]::new);
@@ -151,9 +156,10 @@ protected boolean lessThan(RRFRankDoc a, RRFRankDoc b) {
             }
             return rrf1.doc < rrf2.doc ? -1 : 1;
         });
+        // trim results to size
         RRFRankDoc[] topResults = new RRFRankDoc[Math.min(size, sortedResults.length - from)];
         for (int rank = 0; rank < topResults.length; ++rank) {
-            topResults[rank] = sortedResults[rank];
+            topResults[rank] = sortedResults[from + rank];
             topResults[rank].rank = rank + 1 + from;
         }
         // update fetch hits for the fetch phase, so we gather any additional
diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankShardContext.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankShardContext.java
index 59307e62872fb..9843b14df6903 100644
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankShardContext.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFQueryPhaseRankShardContext.java
@@ -25,11 +25,9 @@
 public class RRFQueryPhaseRankShardContext extends QueryPhaseRankShardContext {
 
     private final int rankConstant;
-    private final int from;
 
-    public RRFQueryPhaseRankShardContext(List<Query> queries, int from, int windowSize, int rankConstant) {
-        super(queries, windowSize);
-        this.from = from;
+    public RRFQueryPhaseRankShardContext(List<Query> queries, int rankWindowSize, int rankConstant) {
+        super(queries, rankWindowSize);
         this.rankConstant = rankConstant;
     }
 
@@ -41,7 +39,7 @@ public RRFRankShardResult combineQueryPhaseResults(List<TopDocs> rankResults) {
         // if a doc isn't part of a result set its position will be NO_RANK [0] and
         // its score is [0f]
         int queries = rankResults.size();
-        Map<Integer, RRFRankDoc> docsToRankResults = Maps.newMapWithExpectedSize(windowSize);
+        Map<Integer, RRFRankDoc> docsToRankResults = Maps.newMapWithExpectedSize(rankWindowSize);
         int index = 0;
         for (TopDocs rrfRankResult : rankResults) {
             int rank = 1;
@@ -92,8 +90,9 @@ public RRFRankShardResult combineQueryPhaseResults(List<TopDocs> rankResults) {
             }
             return rrf1.doc < rrf2.doc ? -1 : 1;
         });
-        // trim the results to window size
-        RRFRankDoc[] topResults = new RRFRankDoc[Math.min(windowSize + from, sortedResults.length)];
+        // trim the results if needed, otherwise each shard will always return `rank_window_size` results.
+        // pagination and all else will happen on the coordinator when combining the shard responses
+        RRFRankDoc[] topResults = new RRFRankDoc[Math.min(rankWindowSize, sortedResults.length)];
         for (int rank = 0; rank < topResults.length; ++rank) {
             topResults[rank] = sortedResults[rank];
             topResults[rank].rank = rank + 1;
diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilder.java
index fee3c7b5e9cf0..8f3ed15037c08 100644
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilder.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilder.java
@@ -47,7 +47,7 @@ public class RRFRankBuilder extends RankBuilder {
     });
 
     static {
-        PARSER.declareInt(optionalConstructorArg(), WINDOW_SIZE_FIELD);
+        PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD);
         PARSER.declareInt(optionalConstructorArg(), RANK_CONSTANT_FIELD);
     }
 
@@ -65,8 +65,8 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio
 
     private final int rankConstant;
 
-    public RRFRankBuilder(int windowSize, int rankConstant) {
-        super(windowSize);
+    public RRFRankBuilder(int rankWindowSize, int rankConstant) {
+        super(rankWindowSize);
         this.rankConstant = rankConstant;
     }
 
@@ -94,14 +94,13 @@ public int rankConstant() {
         return rankConstant;
     }
 
-    @Override
     public QueryPhaseRankShardContext buildQueryPhaseShardContext(List<Query> queries, int from) {
-        return new RRFQueryPhaseRankShardContext(queries, from, windowSize(), rankConstant);
+        return new RRFQueryPhaseRankShardContext(queries, rankWindowSize(), rankConstant);
     }
 
     @Override
     public QueryPhaseRankCoordinatorContext buildQueryPhaseCoordinatorContext(int size, int from) {
-        return new RRFQueryPhaseRankCoordinatorContext(size, from, windowSize(), rankConstant);
+        return new RRFQueryPhaseRankCoordinatorContext(size, from, rankWindowSize(), rankConstant);
     }
 
     @Override
diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java
index ea8255f73af88..077c933fa9add 100644
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java
@@ -38,7 +38,7 @@ public final class RRFRetrieverBuilder extends RetrieverBuilder {
     public static final NodeFeature RRF_RETRIEVER_SUPPORTED = new NodeFeature("rrf_retriever_supported");
 
     public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers");
-    public static final ParseField WINDOW_SIZE_FIELD = new ParseField("window_size");
+    public static final ParseField RANK_WINDOW_SIZE_FIELD = new ParseField("rank_window_size");
     public static final ParseField RANK_CONSTANT_FIELD = new ParseField("rank_constant");
 
     public static final ObjectParser<RRFRetrieverBuilder, RetrieverParserContext> PARSER = new ObjectParser<>(
@@ -54,7 +54,7 @@ public final class RRFRetrieverBuilder extends RetrieverBuilder {
             p.nextToken();
             return retrieverBuilder;
         }, RETRIEVERS_FIELD);
-        PARSER.declareInt((r, v) -> r.windowSize = v, WINDOW_SIZE_FIELD);
+        PARSER.declareInt((r, v) -> r.rankWindowSize = v, RANK_WINDOW_SIZE_FIELD);
         PARSER.declareInt((r, v) -> r.rankConstant = v, RANK_CONSTANT_FIELD);
 
         RetrieverBuilder.declareBaseParserFields(NAME, PARSER);
@@ -71,7 +71,7 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP
     }
 
     List<RetrieverBuilder> retrieverBuilders = Collections.emptyList();
-    int windowSize = RRFRankBuilder.DEFAULT_WINDOW_SIZE;
+    int rankWindowSize = RRFRankBuilder.DEFAULT_WINDOW_SIZE;
     int rankConstant = RRFRankBuilder.DEFAULT_RANK_CONSTANT;
 
     @Override
@@ -88,7 +88,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
             retrieverBuilder.extractToSearchSourceBuilder(searchSourceBuilder, true);
         }
 
-        searchSourceBuilder.rankBuilder(new RRFRankBuilder(windowSize, rankConstant));
+        searchSourceBuilder.rankBuilder(new RRFRankBuilder(rankWindowSize, rankConstant));
     }
 
     // ---- FOR TESTING XCONTENT PARSING ----
@@ -113,21 +113,21 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept
             builder.endArray();
         }
 
-        builder.field(WINDOW_SIZE_FIELD.getPreferredName(), windowSize);
+        builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize);
         builder.field(RANK_CONSTANT_FIELD.getPreferredName(), rankConstant);
     }
 
     @Override
     public boolean doEquals(Object o) {
         RRFRetrieverBuilder that = (RRFRetrieverBuilder) o;
-        return windowSize == that.windowSize
+        return rankWindowSize == that.rankWindowSize
             && rankConstant == that.rankConstant
             && Objects.equals(retrieverBuilders, that.retrieverBuilders);
     }
 
     @Override
     public int doHashCode() {
-        return Objects.hash(retrieverBuilders, windowSize, rankConstant);
+        return Objects.hash(retrieverBuilders, rankWindowSize, rankConstant);
     }
 
     // ---- END FOR TESTING ----
diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilderTests.java
index 001857385c5de..7f251416c0a44 100644
--- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilderTests.java
+++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankBuilderTests.java
@@ -29,9 +29,9 @@ protected RRFRankBuilder createTestInstance() {
     @Override
     protected RRFRankBuilder mutateInstance(RRFRankBuilder instance) throws IOException {
         if (randomBoolean()) {
-            return new RRFRankBuilder(instance.windowSize(), instance.rankConstant() == 1 ? 2 : instance.rankConstant() - 1);
+            return new RRFRankBuilder(instance.rankWindowSize(), instance.rankConstant() == 1 ? 2 : instance.rankConstant() - 1);
         } else {
-            return new RRFRankBuilder(instance.windowSize() == 0 ? 1 : instance.windowSize() - 1, instance.rankConstant());
+            return new RRFRankBuilder(instance.rankWindowSize() == 0 ? 1 : instance.rankWindowSize() - 1, instance.rankConstant());
         }
     }
 
@@ -61,7 +61,7 @@ public void testCreateRankContexts() {
         List<Query> queries = List.of(new TermQuery(new Term("field0", "test0")), new TermQuery(new Term("field1", "test1")));
         QueryPhaseRankShardContext queryPhaseRankShardContext = rrfRankBuilder.buildQueryPhaseShardContext(queries, randomInt());
         assertEquals(queries, queryPhaseRankShardContext.queries());
-        assertEquals(rrfRankBuilder.windowSize(), queryPhaseRankShardContext.windowSize());
+        assertEquals(rrfRankBuilder.rankWindowSize(), queryPhaseRankShardContext.rankWindowSize());
 
         assertNotNull(rrfRankBuilder.buildQueryPhaseCoordinatorContext(randomInt(), randomInt()));
     }
diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankContextTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankContextTests.java
index 50aa1d257d214..61859e280acdf 100644
--- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankContextTests.java
+++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRankContextTests.java
@@ -35,7 +35,7 @@ private void assertRDEquals(RRFRankDoc rd0, RRFRankDoc rd1) {
     }
 
     public void testShardCombine() {
-        RRFQueryPhaseRankShardContext context = new RRFQueryPhaseRankShardContext(null, 0, 10, 1);
+        RRFQueryPhaseRankShardContext context = new RRFQueryPhaseRankShardContext(null, 10, 1);
         List<TopDocs> topDocs = List.of(
             new TopDocs(
                 null,
@@ -266,7 +266,7 @@ public void testCoordinatorRank() {
     }
 
     public void testShardTieBreaker() {
-        RRFQueryPhaseRankShardContext context = new RRFQueryPhaseRankShardContext(null, 0, 10, 1);
+        RRFQueryPhaseRankShardContext context = new RRFQueryPhaseRankShardContext(null, 10, 1);
 
         List<TopDocs> topDocs = List.of(
             new TopDocs(null, new ScoreDoc[] { new ScoreDoc(1, 10.0f, -1), new ScoreDoc(2, 9.0f, -1) }),
diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java
index d63e8a14b59d5..330c936327b81 100644
--- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java
+++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderParsingTests.java
@@ -32,7 +32,7 @@ public static RRFRetrieverBuilder createRandomRRFRetrieverBuilder() {
         RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder();
 
         if (randomBoolean()) {
-            rrfRetrieverBuilder.windowSize = randomIntBetween(1, 10000);
+            rrfRetrieverBuilder.rankWindowSize = randomIntBetween(1, 10000);
         }
 
         if (randomBoolean()) {
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/100_rank_rrf.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/100_rank_rrf.yml
index e55a1897eb701..c9eaa01616175 100644
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/100_rank_rrf.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/100_rank_rrf.yml
@@ -75,7 +75,7 @@ setup:
               text: term
           rank:
             rrf:
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -121,7 +121,7 @@ setup:
           ]
           rank:
             rrf:
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -169,7 +169,7 @@ setup:
           ]
           rank:
             rrf:
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -189,3 +189,41 @@ setup:
   - match: { hits.hits.2._rank: 3 }
   - match: { hits.hits.2.fields.text.0: "other" }
   - match: { hits.hits.2.fields.keyword.0: "other" }
+
+
+---
+"RRF rank should fail if size > rank_window_size":
+
+  - do:
+      catch: "/\\[rank\\] requires \\[rank_window_size: 2\\] be greater than or equal to \\[size: 10\\]/"
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          fields: [ "text", "keyword" ]
+          knn:
+            field: vector
+            query_vector: [ 0.0 ]
+            k: 3
+            num_candidates: 3
+          sub_searches: [
+            {
+              "query": {
+                "term": {
+                  "text": "term"
+                }
+              }
+            },
+            {
+              "query": {
+                "match": {
+                  "keyword": "keyword"
+                }
+              }
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 2
+              rank_constant: 1
+          size: 10
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/150_rank_rrf_pagination.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/150_rank_rrf_pagination.yml
new file mode 100644
index 0000000000000..1c950be5bfbf9
--- /dev/null
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/150_rank_rrf_pagination.yml
@@ -0,0 +1,1055 @@
+setup:
+  - skip:
+      version: ' - 8.14.99'
+      reason: 'pagination for rrf was added in 8.15'
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+          mappings:
+            properties:
+              number_val:
+                type: keyword
+              char_val:
+                type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        body:
+          number_val: "1"
+          char_val: "A"
+
+  - do:
+      index:
+        index: test
+        id: 2
+        body:
+          number_val: "2"
+          char_val: "B"
+
+  - do:
+      index:
+        index: test
+        id: 3
+        body:
+          number_val: "3"
+          char_val: "C"
+
+  - do:
+      index:
+        index: test
+        id: 4
+        body:
+          number_val: "4"
+          char_val: "D"
+
+  - do:
+      index:
+        index: test
+        id: 5
+        body:
+          number_val: "5"
+          char_val: "E"
+
+  - do:
+      indices.refresh: {}
+
+---
+"Standard pagination within rank_window_size":
+  # this test retrieves the same results from two queries, and applies a simple pagination skipping the first result
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "2",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "3",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "4",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from : 1
+          size : 10
+
+  - match: { hits.total.value : 4 }
+  - length: { hits.hits : 3 }
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0._rank: 2 }
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1._rank: 3 }
+  - match: { hits.hits.2._id: "4" }
+  - match: { hits.hits.2._rank: 4 }
+
+---
+"Standard pagination outside rank_window_size":
+  # in this example, from starts *after* rank_window_size so, we expect 0 results to be returned
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "2",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "3",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "4",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 2
+              rank_constant: 10
+          from : 10
+          size : 2
+
+  - match: { hits.total.value : 4 }
+  - length: { hits.hits : 0 }
+
+---
+"Standard pagination partially outside rank_window_size":
+  # in this example we have that from starts *within* rank_window_size, but "from + size" goes over
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "2",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "3",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "4",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 3
+              rank_constant: 10
+          from : 2
+          size : 2
+
+  - match: { hits.total.value : 4 }
+  - length: { hits.hits : 1 }
+  - match: { hits.hits.0._id: "3" }
+  - match: { hits.hits.0._rank: 3 }
+
+
+---
+"Pagination within interleaved results":
+  # perform two searches with different "from" parameter, ensuring that results are consistent
+  # rank_window_size covers the entire result set for both queries, so pagination should be consistent
+  # queryA has a result set of [1, 2, 3, 4] and
+  # queryB has a result set of [4, 3, 1, 2]
+  # so for rank_constant=10, the expected order is [1, 4, 3, 2]
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "2",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "3",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "4",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from : 0
+          size : 2
+
+  - match: { hits.total.value : 4 }
+  - length: { hits.hits : 2 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._rank: 1 }
+  - match: { hits.hits.1._id: "4" }
+  - match: { hits.hits.1._rank: 2 }
+
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [1, 2, 3, 4]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "2",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "3",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "4",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from : 2
+          size : 2
+
+  - match: { hits.total.value : 4 }
+  - length: { hits.hits : 2 }
+  - match: { hits.hits.0._id: "3" }
+  - match: { hits.hits.0._rank: 3 }
+  - match: { hits.hits.1._id: "2" }
+  - match: { hits.hits.1._rank: 4 }
+
+---
+"Pagination within interleaved results, different result set sizes, rank_window_size covering all results":
+  # perform multiple searches with different "from" parameter, ensuring that results are consistent
+  # rank_window_size covers the entire result set for both queries, so pagination should be consistent
+  # queryA has a result set of [5, 1] and
+  # queryB has a result set of [4, 3, 1, 2]
+  # so for rank_constant=10, the expected order is [1, 5, 4, 3, 2]
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [5, 1]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "5",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 9.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from : 0
+          size : 2
+
+  - match: { hits.total.value : 5 }
+  - length: { hits.hits : 2 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._rank: 1 }
+  - match: { hits.hits.1._id: "5" }
+  - match: { hits.hits.1._rank: 2 }
+
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [5, 1]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "5",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 9.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from : 2
+          size : 2
+
+  - match: { hits.total.value : 5 }
+  - length: { hits.hits : 2 }
+  - match: { hits.hits.0._id: "4" }
+  - match: { hits.hits.0._rank: 3 }
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1._rank: 4 }
+
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [5, 1]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "5",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 9.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 10
+              rank_constant: 10
+          from: 4
+          size: 2
+
+  - match: { hits.total.value: 5 }
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0._rank: 5 }
+
+
+---
+"Pagination within interleaved results, different result set sizes, rank_window_size not covering all results":
+  # perform multiple searches with different "from" parameter, ensuring that results are consistent
+  # rank_window_size does not cover the entire result set for both queries, so the results should be different
+  # from the test above. More specifically, we'd get to collect 2 results from each query, so we'd have:
+  # queryA has a result set of [5, 1] and
+  # queryB has a result set of [4, 3]
+  # so for rank_constant=10, the expected order is [5, 4, 1, 3],
+  # and the rank_window_size-sized result set that we'd paginate over is [5, 4]
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [5, 1]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "5",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 9.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 2
+              rank_constant: 10
+          from : 0
+          size : 2
+
+  - match: { hits.total.value : 5 }
+  - length: { hits.hits : 2 }
+  - match: { hits.hits.0._id: "5" }
+  - match: { hits.hits.0._rank: 1 }
+  - match: { hits.hits.1._id: "4" }
+  - match: { hits.hits.1._rank: 2 }
+
+  - do:
+      search:
+        index: test
+        body:
+          track_total_hits: true
+          sub_searches: [
+            {
+              # this should clause would generate the result set [5, 1]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        number_val: {
+                          value: "5",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        number_val: {
+                          value: "1",
+                          boost: 9.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            },
+            {
+              # this should clause would generate the result set [4, 3, 1, 2]
+              "query": {
+                bool: {
+                  should: [
+                    {
+                      term: {
+                        char_val: {
+                          value: "D",
+                          boost: 10.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "C",
+                          boost: 9.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "A",
+                          boost: 8.0
+                        }
+                      }
+                    },
+                    {
+                      term: {
+                        char_val: {
+                          value: "B",
+                          boost: 7.0
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+
+            }
+          ]
+          rank:
+            rrf:
+              rank_window_size: 2
+              rank_constant: 10
+          from : 2
+          size : 2
+
+  - match: { hits.total.value : 5 }
+  - length: { hits.hits : 0 }
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/200_rank_rrf_script.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/200_rank_rrf_script.yml
index de5b29b21da72..0583e6d7ae51a 100644
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/200_rank_rrf_script.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/200_rank_rrf_script.yml
@@ -124,7 +124,7 @@ setup:
                   ]
           rank:
             rrf:
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 5
 
@@ -189,7 +189,7 @@ setup:
                   ]
           rank:
             rrf:
-              window_size: 3
+              rank_window_size: 3
               rank_constant: 1
           size: 1
 
@@ -267,7 +267,7 @@ setup:
                   ]
           rank:
             rrf:
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 5
 
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/300_rrf_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/300_rrf_retriever.yml
index 1387c37349cd4..d3d45ef2b18e8 100644
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/300_rrf_retriever.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/300_rrf_retriever.yml
@@ -86,7 +86,7 @@ setup:
                   }
                 }
               ]
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -136,7 +136,7 @@ setup:
                   }
                 }
               ]
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -191,7 +191,7 @@ setup:
                   }
                 }
               ]
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           size: 10
 
@@ -260,7 +260,7 @@ setup:
                   }
                 }
               ]
-              window_size: 2
+              rank_window_size: 2
               rank_constant: 1
 
   - match: { hits.total.value : 3 }
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/400_rrf_retriever_script.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/400_rrf_retriever_script.yml
index 2c2b59f306ee3..520389d51b737 100644
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/400_rrf_retriever_script.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/400_rrf_retriever_script.yml
@@ -138,7 +138,7 @@ setup:
                   }
                 }
               ]
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           aggs:
             sums:
@@ -207,7 +207,7 @@ setup:
                   }
                 }
               ]
-              window_size: 3
+              rank_window_size: 3
               rank_constant: 1
           aggs:
             sums:
@@ -308,7 +308,7 @@ setup:
                   }
                 }
               ]
-              window_size: 100
+              rank_window_size: 100
               rank_constant: 1
           aggs:
             sums:

From 4ef8b3825ec40051ead58d2c6180d6b80be060be Mon Sep 17 00:00:00 2001
From: Oleksandr Kolomiiets <oleksandr.kolomiiets@elastic.co>
Date: Thu, 25 Apr 2024 10:27:32 -0700
Subject: [PATCH 7/8] Revert "Format default values of IP ranges to match other
 range bound" (#107910)

This reverts commit acb5139 (a part of #107081). This commit impacts search behaviour for IP range fields and so needs to be reverted.
---
 .../test/range/20_synthetic_source.yml        |   2 +-
 .../index/mapper/RangeFieldMapper.java        | 112 ++++++++----------
 .../elasticsearch/index/mapper/RangeType.java |  29 -----
 .../index/mapper/IpRangeFieldMapperTests.java |  31 -----
 .../index/mapper/RangeFieldMapperTests.java   |   4 +-
 5 files changed, 54 insertions(+), 124 deletions(-)

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/range/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/range/20_synthetic_source.yml
index 60c61ddbb698e..3551d022c2f4a 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/range/20_synthetic_source.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/range/20_synthetic_source.yml
@@ -515,7 +515,7 @@ setup:
         id: "7"
   - match:
       _source:
-        ip_range: { "gte": "0.0.0.0", "lte": "10.10.10.10" }
+        ip_range: { "gte": "::", "lte": "10.10.10.10" }
 
   - do:
       get:
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java
index 885e0c9f2642d..f84a1b540a2be 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java
@@ -381,76 +381,66 @@ protected boolean supportsParsingObject() {
 
     @Override
     protected void parseCreateField(DocumentParserContext context) throws IOException {
+        Range range;
         XContentParser parser = context.parser();
-        if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
-            return;
-        }
-
-        Range range = parseRange(parser);
-        context.doc().addAll(fieldType().rangeType.createFields(context, name(), range, index, hasDocValues, store));
-
-        if (hasDocValues == false && (index || store)) {
-            context.addToFieldNames(fieldType().name());
-        }
-    }
-
-    private Range parseRange(XContentParser parser) throws IOException {
         final XContentParser.Token start = parser.currentToken();
-        if (fieldType().rangeType == RangeType.IP && start == XContentParser.Token.VALUE_STRING) {
-            return parseIpRangeFromCidr(parser);
-        }
-
-        if (start != XContentParser.Token.START_OBJECT) {
+        if (start == XContentParser.Token.VALUE_NULL) {
+            return;
+        } else if (start == XContentParser.Token.START_OBJECT) {
+            RangeFieldType fieldType = fieldType();
+            RangeType rangeType = fieldType.rangeType;
+            String fieldName = null;
+            Object from = rangeType.minValue();
+            Object to = rangeType.maxValue();
+            boolean includeFrom = DEFAULT_INCLUDE_LOWER;
+            boolean includeTo = DEFAULT_INCLUDE_UPPER;
+            XContentParser.Token token;
+            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                if (token == XContentParser.Token.FIELD_NAME) {
+                    fieldName = parser.currentName();
+                } else {
+                    if (fieldName.equals(GT_FIELD.getPreferredName())) {
+                        includeFrom = false;
+                        if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
+                            from = rangeType.parseFrom(fieldType, parser, coerce.value(), includeFrom);
+                        }
+                    } else if (fieldName.equals(GTE_FIELD.getPreferredName())) {
+                        includeFrom = true;
+                        if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
+                            from = rangeType.parseFrom(fieldType, parser, coerce.value(), includeFrom);
+                        }
+                    } else if (fieldName.equals(LT_FIELD.getPreferredName())) {
+                        includeTo = false;
+                        if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
+                            to = rangeType.parseTo(fieldType, parser, coerce.value(), includeTo);
+                        }
+                    } else if (fieldName.equals(LTE_FIELD.getPreferredName())) {
+                        includeTo = true;
+                        if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
+                            to = rangeType.parseTo(fieldType, parser, coerce.value(), includeTo);
+                        }
+                    } else {
+                        throw new DocumentParsingException(
+                            parser.getTokenLocation(),
+                            "error parsing field [" + name() + "], with unknown parameter [" + fieldName + "]"
+                        );
+                    }
+                }
+            }
+            range = new Range(rangeType, from, to, includeFrom, includeTo);
+        } else if (fieldType().rangeType == RangeType.IP && start == XContentParser.Token.VALUE_STRING) {
+            range = parseIpRangeFromCidr(parser);
+        } else {
             throw new DocumentParsingException(
                 parser.getTokenLocation(),
                 "error parsing field [" + name() + "], expected an object but got " + parser.currentName()
             );
         }
+        context.doc().addAll(fieldType().rangeType.createFields(context, name(), range, index, hasDocValues, store));
 
-        RangeFieldType fieldType = fieldType();
-        RangeType rangeType = fieldType.rangeType;
-        String fieldName = null;
-        Object parsedFrom = null;
-        Object parsedTo = null;
-        boolean includeFrom = DEFAULT_INCLUDE_LOWER;
-        boolean includeTo = DEFAULT_INCLUDE_UPPER;
-        XContentParser.Token token;
-        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
-            if (token == XContentParser.Token.FIELD_NAME) {
-                fieldName = parser.currentName();
-            } else {
-                if (fieldName.equals(GT_FIELD.getPreferredName())) {
-                    includeFrom = false;
-                    if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
-                        parsedFrom = rangeType.parseFrom(fieldType, parser, coerce.value(), includeFrom);
-                    }
-                } else if (fieldName.equals(GTE_FIELD.getPreferredName())) {
-                    includeFrom = true;
-                    if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
-                        parsedFrom = rangeType.parseFrom(fieldType, parser, coerce.value(), includeFrom);
-                    }
-                } else if (fieldName.equals(LT_FIELD.getPreferredName())) {
-                    includeTo = false;
-                    if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
-                        parsedTo = rangeType.parseTo(fieldType, parser, coerce.value(), includeTo);
-                    }
-                } else if (fieldName.equals(LTE_FIELD.getPreferredName())) {
-                    includeTo = true;
-                    if (parser.currentToken() != XContentParser.Token.VALUE_NULL) {
-                        parsedTo = rangeType.parseTo(fieldType, parser, coerce.value(), includeTo);
-                    }
-                } else {
-                    throw new DocumentParsingException(
-                        parser.getTokenLocation(),
-                        "error parsing field [" + name() + "], with unknown parameter [" + fieldName + "]"
-                    );
-                }
-            }
+        if (hasDocValues == false && (index || store)) {
+            context.addToFieldNames(fieldType().name());
         }
-        Object from = parsedFrom != null ? parsedFrom : rangeType.defaultFrom(parsedTo);
-        Object to = parsedTo != null ? parsedTo : rangeType.defaultTo(parsedFrom);
-
-        return new Range(rangeType, from, to, includeFrom, includeTo);
     }
 
     private static Range parseIpRangeFromCidr(final XContentParser parser) throws IOException {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RangeType.java b/server/src/main/java/org/elasticsearch/index/mapper/RangeType.java
index 24a1eb869cf25..bb4fb1acc0b14 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/RangeType.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/RangeType.java
@@ -31,7 +31,6 @@
 import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
-import java.net.Inet4Address;
 import java.net.InetAddress;
 import java.time.Instant;
 import java.time.ZoneId;
@@ -65,26 +64,6 @@ public InetAddress parseTo(RangeFieldMapper.RangeFieldType fieldType, XContentPa
             return included ? address : nextDown(address);
         }
 
-        public Object defaultFrom(Object parsedTo) {
-            if (parsedTo == null) {
-                return minValue();
-            }
-
-            // Make sure that we keep the range inside the same address family.
-            // `minValue()` is always IPv6 so we need to adjust it.
-            return parsedTo instanceof Inet4Address ? InetAddressPoint.decode(new byte[4]) : minValue();
-        }
-
-        public Object defaultTo(Object parsedFrom) {
-            if (parsedFrom == null) {
-                return maxValue();
-            }
-
-            // Make sure that we keep the range inside the same address family.
-            // `maxValue()` is always IPv6 so we need to adjust it.
-            return parsedFrom instanceof Inet4Address ? InetAddressPoint.decode(new byte[] { -1, -1, -1, -1 }) : maxValue();
-        }
-
         @Override
         public InetAddress parseValue(Object value, boolean coerce, @Nullable DateMathParser dateMathParser) {
             if (value instanceof InetAddress) {
@@ -865,14 +844,6 @@ public Object parseTo(RangeFieldMapper.RangeFieldType fieldType, XContentParser
         return included ? value : (Number) nextDown(value);
     }
 
-    public Object defaultFrom(Object parsedTo) {
-        return minValue();
-    }
-
-    public Object defaultTo(Object parsedFrom) {
-        return maxValue();
-    }
-
     public abstract Object minValue();
 
     public abstract Object maxValue();
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpRangeFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpRangeFieldMapperTests.java
index 279c9263c98a9..ddec4b8ca65e5 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/IpRangeFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/IpRangeFieldMapperTests.java
@@ -87,37 +87,6 @@ public void testStoreCidr() throws Exception {
         }
     }
 
-    @Override
-    public void testNullBounds() throws IOException {
-        DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
-            minimalMapping(b);
-            b.field("store", true);
-        }));
-
-        ParsedDocument bothNull = mapper.parse(source(b -> b.startObject("field").nullField("gte").nullField("lte").endObject()));
-        assertThat(storedValue(bothNull), equalTo("[:: : ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]"));
-
-        ParsedDocument onlyFromIPv4 = mapper.parse(
-            source(b -> b.startObject("field").field("gte", rangeValue()).nullField("lte").endObject())
-        );
-        assertThat(storedValue(onlyFromIPv4), equalTo("[192.168.1.7 : 255.255.255.255]"));
-
-        ParsedDocument onlyToIPv4 = mapper.parse(
-            source(b -> b.startObject("field").nullField("gte").field("lte", rangeValue()).endObject())
-        );
-        assertThat(storedValue(onlyToIPv4), equalTo("[0.0.0.0 : 192.168.1.7]"));
-
-        ParsedDocument onlyFromIPv6 = mapper.parse(
-            source(b -> b.startObject("field").field("gte", "2001:db8::").nullField("lte").endObject())
-        );
-        assertThat(storedValue(onlyFromIPv6), equalTo("[2001:db8:: : ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]"));
-
-        ParsedDocument onlyToIPv6 = mapper.parse(
-            source(b -> b.startObject("field").nullField("gte").field("lte", "2001:db8::").endObject())
-        );
-        assertThat(storedValue(onlyToIPv6), equalTo("[:: : 2001:db8::]"));
-    }
-
     @SuppressWarnings("unchecked")
     public void testValidSyntheticSource() throws IOException {
         CheckedConsumer<XContentBuilder, IOException> mapping = b -> {
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java
index d87d8dbc2bb4e..54c2d93ab73fa 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java
@@ -236,14 +236,14 @@ private void assertNullBounds(CheckedConsumer<XContentBuilder, IOException> toCh
         }
     }
 
-    protected static String storedValue(ParsedDocument doc) {
+    private static String storedValue(ParsedDocument doc) {
         assertEquals(3, doc.rootDoc().getFields("field").size());
         List<IndexableField> fields = doc.rootDoc().getFields("field");
         IndexableField storedField = fields.get(2);
         return storedField.stringValue();
     }
 
-    public void testNullBounds() throws IOException {
+    public final void testNullBounds() throws IOException {
 
         // null, null => min, max
         assertNullBounds(b -> b.startObject("field").nullField("gte").nullField("lte").endObject(), true, true);

From e1d902d33ba3773123d651e2f60b51b0e9e95670 Mon Sep 17 00:00:00 2001
From: Oleksandr Kolomiiets <oleksandr.kolomiiets@elastic.co>
Date: Thu, 25 Apr 2024 10:31:27 -0700
Subject: [PATCH 8/8] Implement synthetic source support for annotated text
 field (#107735)

This PR adds synthetic source support for annotated_text fields. Existing implementation for text is reused including test infrastructure so the majority of the change is moving and making things accessible.

Contributes to #106460, #78744.
---
 docs/changelog/107735.yaml                    |   5 +
 docs/plugins/mapper-annotated-text.asciidoc   | 161 +++++++++++---
 .../mapping/fields/synthetic-source.asciidoc  |   1 +
 .../src/main/java/module-info.java            |  19 ++
 .../AnnotatedTextFieldMapper.java             |  52 ++++-
 .../index/mapper/annotatedtext/Features.java  |  26 +++
 ...lasticsearch.features.FeatureSpecification |   9 +
 .../AnnotatedTextFieldMapperTests.java        |  25 ++-
 .../20_synthetic_source.yml                   | 197 +++++++++++++++++
 .../index/mapper/KeywordFieldMapper.java      |   2 +-
 .../index/mapper/TextFieldMapper.java         |  46 ++--
 .../index/mapper/KeywordFieldMapperTests.java | 110 +---------
 .../index/mapper/TextFieldMapperTests.java    | 136 +-----------
 .../KeywordFieldSyntheticSourceSupport.java   | 126 +++++++++++
 .../index/mapper/MapperTestCase.java          |   2 +-
 ...xtFieldFamilySyntheticSourceTestSetup.java | 207 ++++++++++++++++++
 16 files changed, 824 insertions(+), 300 deletions(-)
 create mode 100644 docs/changelog/107735.yaml
 create mode 100644 plugins/mapper-annotated-text/src/main/java/module-info.java
 create mode 100644 plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java
 create mode 100644 plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
 create mode 100644 plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml
 create mode 100644 test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
 create mode 100644 test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java

diff --git a/docs/changelog/107735.yaml b/docs/changelog/107735.yaml
new file mode 100644
index 0000000000000..372cb59ba8b1f
--- /dev/null
+++ b/docs/changelog/107735.yaml
@@ -0,0 +1,5 @@
+pr: 107735
+summary: Implement synthetic source support for annotated text field
+area: Mapping
+type: feature
+issues: []
diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc
index 14669d857817c..900eaa5e97a04 100644
--- a/docs/plugins/mapper-annotated-text.asciidoc
+++ b/docs/plugins/mapper-annotated-text.asciidoc
@@ -6,7 +6,7 @@ experimental[]
 The mapper-annotated-text plugin provides the ability to index text that is a
 combination of free-text and special markup that is typically used to identify
 items of interest such as people or organisations (see NER or Named Entity Recognition
-tools). 
+tools).
 
 
 The elasticsearch markup allows one or more additional tokens to be injected, unchanged, into the token
@@ -18,7 +18,7 @@ include::install_remove.asciidoc[]
 [[mapper-annotated-text-usage]]
 ==== Using the `annotated-text` field
 
-The `annotated-text` tokenizes text content as per the more common {ref}/text.html[`text`] field (see 
+The `annotated-text` tokenizes text content as per the more common {ref}/text.html[`text`] field (see
 "limitations" below) but also injects any marked-up annotation tokens directly into
 the search index:
 
@@ -49,7 +49,7 @@ in the search index:
 --------------------------
 GET my-index-000001/_analyze
 {
-  "field": "my_field", 
+  "field": "my_field",
   "text":"Investors in [Apple](Apple+Inc.) rejoiced."
 }
 --------------------------
@@ -76,7 +76,7 @@ Response:
       "position": 1
     },
     {
-      "token": "Apple Inc.", <1> 
+      "token": "Apple Inc.", <1>
       "start_offset": 13,
       "end_offset": 18,
       "type": "annotation",
@@ -106,7 +106,7 @@ the token stream and at the same position (position 2) as the text token (`apple
 
 
 We can now perform searches for annotations using regular `term` queries that don't tokenize
-the provided search values. Annotations are a more precise way of matching as can be seen 
+the provided search values. Annotations are a more precise way of matching as can be seen
 in this example where a search for `Beck` will not match `Jeff Beck` :
 
 [source,console]
@@ -133,18 +133,119 @@ GET my-index-000001/_search
 }
 --------------------------
 
-<1> As well as tokenising the plain text into single words e.g. `beck`, here we 
+<1> As well as tokenising the plain text into single words e.g. `beck`, here we
 inject the single token value `Beck` at the same position as `beck` in the token stream.
 <2> Note annotations can inject multiple tokens at the same position - here we inject both
 the very specific value `Jeff Beck` and the broader term `Guitarist`. This enables
 broader positional queries e.g. finding mentions of a `Guitarist` near to `strat`.
-<3> A benefit of searching with these carefully defined annotation tokens is that a query for 
+<3> A benefit of searching with these carefully defined annotation tokens is that a query for
 `Beck` will not match document 2 that contains the tokens `jeff`, `beck` and `Jeff Beck`
 
-WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will 
+WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will
 cause the document to be rejected with a parse failure. In future we hope to have a use for
 the equals signs so wil actively reject documents that contain this today.
 
+[[annotated-text-synthetic-source]]
+===== Synthetic `_source`
+
+IMPORTANT: Synthetic `_source` is Generally Available only for TSDB indices
+(indices that have `index.mode` set to `time_series`). For other indices
+synthetic `_source` is in technical preview. Features in technical preview may
+be changed or removed in a future release. Elastic will work to fix
+any issues, but features in technical preview are not subject to the support SLA
+of official GA features.
+
+`annotated_text` fields support {ref}/mapping-source-field.html#synthetic-source[synthetic `_source`] if they have
+a {ref}/keyword.html#keyword-synthetic-source[`keyword`] sub-field that supports synthetic
+`_source` or if the `text` field sets `store` to `true`. Either way, it may
+not have {ref}/copy-to.html[`copy_to`].
+
+If using a sub-`keyword` field then the values are sorted in the same way as
+a `keyword` field's values are sorted. By default, that means sorted with
+duplicates removed. So:
+[source,console,id=synthetic-source-text-example-default]
+----
+PUT idx
+{
+  "mappings": {
+    "_source": { "mode": "synthetic" },
+    "properties": {
+      "text": {
+        "type": "annotated_text",
+        "fields": {
+          "raw": {
+            "type": "keyword"
+          }
+        }
+      }
+    }
+  }
+}
+PUT idx/_doc/1
+{
+  "text": [
+    "the quick brown fox",
+    "the quick brown fox",
+    "jumped over the lazy dog"
+  ]
+}
+----
+// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/]
+
+Will become:
+[source,console-result]
+----
+{
+  "text": [
+    "jumped over the lazy dog",
+    "the quick brown fox"
+  ]
+}
+----
+// TEST[s/^/{"_source":/ s/\n$/}/]
+
+NOTE: Reordering text fields can have an effect on {ref}/query-dsl-match-query-phrase.html[phrase]
+and {ref}/span-queries.html[span] queries. See the discussion about {ref}/position-increment-gap.html[`position_increment_gap`] for more detail. You
+can avoid this by making sure the `slop` parameter on the phrase queries
+is lower than the `position_increment_gap`. This is the default.
+
+If the `annotated_text` field sets `store` to true then order and duplicates
+are preserved.
+[source,console,id=synthetic-source-text-example-stored]
+----
+PUT idx
+{
+  "mappings": {
+    "_source": { "mode": "synthetic" },
+    "properties": {
+      "text": { "type": "annotated_text", "store": true }
+    }
+  }
+}
+PUT idx/_doc/1
+{
+  "text": [
+    "the quick brown fox",
+    "the quick brown fox",
+    "jumped over the lazy dog"
+  ]
+}
+----
+// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/]
+
+Will become:
+[source,console-result]
+----
+{
+  "text": [
+    "the quick brown fox",
+    "the quick brown fox",
+    "jumped over the lazy dog"
+  ]
+}
+----
+// TEST[s/^/{"_source":/ s/\n$/}/]
+
 
 [[mapper-annotated-text-tips]]
 ==== Data modelling tips
@@ -153,13 +254,13 @@ the equals signs so wil actively reject documents that contain this today.
 Annotations are normally a way of weaving structured information into unstructured text for
 higher-precision search.
 
-`Entity resolution` is a form of document enrichment undertaken by specialist software or people 
+`Entity resolution` is a form of document enrichment undertaken by specialist software or people
 where references to entities in a document are disambiguated by attaching a canonical ID.
 The ID is used to resolve any number of aliases or distinguish between people with the
-same name. The hyperlinks connecting Wikipedia's articles are a good example of resolved 
-entity IDs woven into text. 
+same name. The hyperlinks connecting Wikipedia's articles are a good example of resolved
+entity IDs woven into text.
 
-These IDs can be embedded as annotations in an annotated_text field but it often makes 
+These IDs can be embedded as annotations in an annotated_text field but it often makes
 sense to include them in dedicated structured fields to support discovery via aggregations:
 
 [source,console]
@@ -214,20 +315,20 @@ GET my-index-000001/_search
 --------------------------
 
 <1> Note the `my_twitter_handles` contains a list of the annotation values
-also used in the unstructured text. (Note the annotated_text syntax requires escaping). 
-By repeating the annotation values in a structured field this application has ensured that 
-the tokens discovered in the structured field can be used for search and highlighting 
-in the unstructured field.  
+also used in the unstructured text. (Note the annotated_text syntax requires escaping).
+By repeating the annotation values in a structured field this application has ensured that
+the tokens discovered in the structured field can be used for search and highlighting
+in the unstructured field.
 <2> In this example we search for documents that talk about components of the elastic stack
 <3> We use the `my_twitter_handles` field here to discover people who are significantly
 associated with the elastic stack.
 
 ===== Avoiding over-matching annotations
-By design, the regular text tokens and the annotation tokens co-exist in the same indexed 
+By design, the regular text tokens and the annotation tokens co-exist in the same indexed
 field but in rare cases this can lead to some over-matching.
 
 The value of an annotation often denotes a _named entity_ (a person, place or company).
-The tokens for these named entities are inserted untokenized, and differ from typical text 
+The tokens for these named entities are inserted untokenized, and differ from typical text
 tokens because they are normally:
 
 * Mixed case e.g. `Madonna`
@@ -235,19 +336,19 @@ tokens because they are normally:
 * Can have punctuation or numbers e.g. `Apple Inc.` or `@kimchy`
 
 This means, for the most part, a search for a named entity in the annotated text field will
-not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result 
-you can drill down to highlight uses in the text without "over matching" on any text tokens 
+not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result
+you can drill down to highlight uses in the text without "over matching" on any text tokens
 like the word `apple` in this context:
 
     the apple was very juicy
-    
-However, a problem arises if your named entity happens to be a single term and lower-case e.g. the 
+
+However, a problem arises if your named entity happens to be a single term and lower-case e.g. the
 company `elastic`. In this case, a search on the annotated text field for the token `elastic`
 may match a text document such as this:
 
     they fired an elastic band
 
-To avoid such false matches users should consider prefixing annotation values to ensure 
+To avoid such false matches users should consider prefixing annotation values to ensure
 they don't name clash with text tokens e.g.
 
     [elastic](Company_elastic) released version 7.0 of the elastic stack today
@@ -273,7 +374,7 @@ GET my-index-000001/_search
 {
   "query": {
     "query_string": {
-        "query": "cats" 
+        "query": "cats"
     }
   },
   "highlight": {
@@ -291,21 +392,21 @@ GET my-index-000001/_search
 
 The annotated highlighter is based on the `unified` highlighter and supports the same
 settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using
-html-like markup such as `<em>cat</em>` the annotated highlighter uses the same 
+html-like markup such as `<em>cat</em>` the annotated highlighter uses the same
 markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term`
-is the key and the matched search term is the value e.g. 
+is the key and the matched search term is the value e.g.
 
     The [cat](_hit_term=cat) sat on the [mat](sku3578)
 
-The annotated highlighter tries to be respectful of any existing markup in the original 
+The annotated highlighter tries to be respectful of any existing markup in the original
 text:
 
-* If the search term matches exactly the location of an existing annotation then the 
+* If the search term matches exactly the location of an existing annotation then the
 `_hit_term` key is merged into the url-like syntax used in the `(...)` part of the
-existing annotation. 
+existing annotation.
 * However, if the search term overlaps the span of an existing annotation it would break
 the markup formatting so the original annotation is removed in favour of a new annotation
-with just the search hit information in the results. 
+with just the search hit information in the results.
 * Any non-overlapping annotations in the original text are preserved in highlighter
 selections
 
diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc
index ec6f51f78eda5..21e98cd55bf3a 100644
--- a/docs/reference/mapping/fields/synthetic-source.asciidoc
+++ b/docs/reference/mapping/fields/synthetic-source.asciidoc
@@ -41,6 +41,7 @@ There are a couple of restrictions to be aware of:
 types:
 
 ** <<aggregate-metric-double-synthetic-source, `aggregate_metric_double`>>
+** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`]
 ** <<binary-synthetic-source,`binary`>>
 ** <<boolean-synthetic-source,`boolean`>>
 ** <<numeric-synthetic-source,`byte`>>
diff --git a/plugins/mapper-annotated-text/src/main/java/module-info.java b/plugins/mapper-annotated-text/src/main/java/module-info.java
new file mode 100644
index 0000000000000..3aa8e46e2980c
--- /dev/null
+++ b/plugins/mapper-annotated-text/src/main/java/module-info.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+module org.elasticsearch.index.mapper.annotatedtext {
+    requires org.elasticsearch.base;
+    requires org.elasticsearch.server;
+    requires org.elasticsearch.xcontent;
+    requires org.apache.lucene.core;
+    requires org.apache.lucene.highlighter;
+
+    // exports nothing
+
+    provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.index.mapper.annotatedtext.Features;
+}
diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
index fae2ab19aee39..6d2b83185d5b7 100644
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -21,17 +21,22 @@
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
+import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.TextParams;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.similarity.SimilarityProvider;
+import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -41,6 +46,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -58,6 +64,8 @@
  **/
 public class AnnotatedTextFieldMapper extends FieldMapper {
 
+    public static final NodeFeature SYNTHETIC_SOURCE_SUPPORT = new NodeFeature("mapper.annotated_text.synthetic_source");
+
     public static final String CONTENT_TYPE = "annotated_text";
 
     private static Builder builder(FieldMapper in) {
@@ -114,7 +122,7 @@ protected Parameter<?>[] getParameters() {
                 meta };
         }
 
-        private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
+        private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context, MultiFields multiFields) {
             TextSearchInfo tsi = new TextSearchInfo(
                 fieldType,
                 similarity.get(),
@@ -126,12 +134,14 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
                 store.getValue(),
                 tsi,
                 context.isSourceSynthetic(),
+                TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
                 meta.getValue()
             );
         }
 
         @Override
         public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
+            MultiFields multiFields = multiFieldsBuilder.build(this, context);
             FieldType fieldType = TextParams.buildFieldType(() -> true, store, indexOptions, norms, termVectors);
             if (fieldType.indexOptions() == IndexOptions.NONE) {
                 throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
@@ -146,8 +156,8 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
             return new AnnotatedTextFieldMapper(
                 name(),
                 fieldType,
-                buildFieldType(fieldType, context),
-                multiFieldsBuilder.build(this, context),
+                buildFieldType(fieldType, context, multiFields),
+                multiFields,
                 copyTo,
                 this
             );
@@ -472,15 +482,15 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th
     }
 
     public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
-
         private AnnotatedTextFieldType(
             String name,
             boolean store,
             TextSearchInfo tsi,
             boolean isSyntheticSource,
+            KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
             Map<String, String> meta
         ) {
-            super(name, true, store, tsi, isSyntheticSource, null, meta, false, false);
+            super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
         }
 
         public AnnotatedTextFieldType(String name, Map<String, String> meta) {
@@ -544,4 +554,36 @@ protected String contentType() {
     public FieldMapper.Builder getMergeBuilder() {
         return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
     }
+
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        if (fieldType.stored()) {
+            return new StringStoredFieldFieldLoader(name(), simpleName(), null) {
+                @Override
+                protected void write(XContentBuilder b, Object value) throws IOException {
+                    b.value((String) value);
+                }
+            };
+        }
+
+        var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
+        if (kwd != null) {
+            return kwd.syntheticFieldLoader(simpleName());
+        }
+
+        throw new IllegalArgumentException(
+            String.format(
+                Locale.ROOT,
+                "field [%s] of type [%s] doesn't support synthetic source unless it is stored or has a sub-field of"
+                    + " type [keyword] with doc values or stored and without a normalizer",
+                name(),
+                typeName()
+            )
+        );
+    }
 }
diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java
new file mode 100644
index 0000000000000..1c4bd22e88145
--- /dev/null
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper.annotatedtext;
+
+import org.elasticsearch.features.FeatureSpecification;
+import org.elasticsearch.features.NodeFeature;
+
+import java.util.Set;
+
+/**
+ * Provides features for annotated text mapper.
+ */
+public class Features implements FeatureSpecification {
+    @Override
+    public Set<NodeFeature> getFeatures() {
+        return Set.of(
+            AnnotatedTextFieldMapper.SYNTHETIC_SOURCE_SUPPORT // Added in 8.15
+        );
+    }
+}
diff --git a/plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
new file mode 100644
index 0000000000000..a19d9deb9c522
--- /dev/null
+++ b/plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
@@ -0,0 +1,9 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the Server Side Public License, v 1; you may not use this file except
+# in compliance with, at your election, the Elastic License 2.0 or the Server
+# Side Public License, v 1.
+#
+
+org.elasticsearch.index.mapper.annotatedtext.Features
diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
index 9f1d063433d88..3b27cdb132851 100644
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
@@ -14,6 +14,7 @@
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
@@ -29,6 +30,7 @@
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.LowercaseNormalizer;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.analysis.StandardTokenizerFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -38,6 +40,7 @@
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.mapper.TextFieldFamilySyntheticSourceTestSetup;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.xcontent.ToXContent;
@@ -54,6 +57,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -144,7 +148,8 @@ public TokenStream create(TokenStream tokenStream) {
             )
         );
         return IndexAnalyzers.of(
-            Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop)
+            Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop),
+            Map.of("lowercase", new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()))
         );
     }
 
@@ -595,7 +600,23 @@ protected boolean supportsIgnoreMalformed() {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
-        throw new AssumptionViolatedException("not supported");
+        assumeFalse("ignore_malformed not supported", ignoreMalformed);
+        return TextFieldFamilySyntheticSourceTestSetup.syntheticSourceSupport("annotated_text", false);
+    }
+
+    @Override
+    protected BlockReaderSupport getSupportedReaders(MapperService mapper, String loaderFieldName) {
+        return TextFieldFamilySyntheticSourceTestSetup.getSupportedReaders(mapper, loaderFieldName);
+    }
+
+    @Override
+    protected Function<Object, Object> loadBlockExpected(BlockReaderSupport blockReaderSupport, boolean columnReader) {
+        return TextFieldFamilySyntheticSourceTestSetup.loadBlockExpected(blockReaderSupport, columnReader);
+    }
+
+    @Override
+    protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) {
+        TextFieldFamilySyntheticSourceTestSetup.validateRoundTripReader(syntheticSource, reader, roundTripReader);
     }
 
     @Override
diff --git a/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml b/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml
new file mode 100644
index 0000000000000..54a51e60f56df
--- /dev/null
+++ b/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml
@@ -0,0 +1,197 @@
+---
+setup:
+  - requires:
+      cluster_features: ["mapper.annotated_text.synthetic_source"]
+      reason: introduced in 8.15.0
+
+---
+stored annotated_text field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: the quick brown fox
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: the quick brown fox
+
+---
+annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: the quick brown fox
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: the quick brown fox
+
+---
+multiple values in stored annotated_text field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]
+
+---
+multiple values in annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["hello", "world"]
+
+
+---
+multiple values in annotated_text field with stored keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+                    store: true
+                    doc_values: false
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]
+
+---
+multiple values in stored annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index bdf25307d3343..eeb452204091d 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -1026,7 +1026,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
         return syntheticFieldLoader(simpleName());
     }
 
-    SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) {
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) {
         if (hasScript()) {
             return SourceLoader.SyntheticFieldLoader.NOTHING;
         }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index ef512e2bbd46b..57dd2fa0b920d 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -390,7 +390,7 @@ private TextFieldType buildFieldType(
                     store.getValue(),
                     tsi,
                     context.isSourceSynthetic(),
-                    syntheticSourceDelegate(fieldType, multiFields),
+                    SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
                     meta.getValue(),
                     eagerGlobalOrdinals.getValue(),
                     indexPhrases.getValue()
@@ -402,17 +402,6 @@ private TextFieldType buildFieldType(
             return ft;
         }
 
-        private static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(FieldType fieldType, MultiFields multiFields) {
-            if (fieldType.stored()) {
-                return null;
-            }
-            var kwd = getKeywordFieldMapperForSyntheticSource(multiFields);
-            if (kwd != null) {
-                return kwd.fieldType();
-            }
-            return null;
-        }
-
         private SubFieldInfo buildPrefixInfo(MapperBuilderContext context, FieldType fieldType, TextFieldType tft) {
             if (indexPrefixes.get() == null) {
                 return null;
@@ -1094,7 +1083,7 @@ public boolean isSyntheticSource() {
             return isSyntheticSource;
         }
 
-        KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate() {
+        public KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate() {
             return syntheticSourceDelegate;
         }
     }
@@ -1473,7 +1462,7 @@ protected void write(XContentBuilder b, Object value) throws IOException {
             };
         }
 
-        var kwd = getKeywordFieldMapperForSyntheticSource(this);
+        var kwd = SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
         if (kwd != null) {
             return kwd.syntheticFieldLoader(simpleName());
         }
@@ -1489,16 +1478,29 @@ protected void write(XContentBuilder b, Object value) throws IOException {
         );
     }
 
-    private static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterable<? extends Mapper> multiFields) {
-        for (Mapper sub : multiFields) {
-            if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
-                KeywordFieldMapper kwd = (KeywordFieldMapper) sub;
-                if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
-                    return kwd;
-                }
+    public static class SyntheticSourceHelper {
+        public static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(FieldType fieldType, MultiFields multiFields) {
+            if (fieldType.stored()) {
+                return null;
+            }
+            var kwd = getKeywordFieldMapperForSyntheticSource(multiFields);
+            if (kwd != null) {
+                return kwd.fieldType();
             }
+            return null;
         }
 
-        return null;
+        public static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterable<? extends Mapper> multiFields) {
+            for (Mapper sub : multiFields) {
+                if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
+                    KeywordFieldMapper kwd = (KeywordFieldMapper) sub;
+                    if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
+                        return kwd;
+                    }
+                }
+            }
+
+            return null;
+        }
     }
 }
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
index 70e375a89d5e7..4824bd337f5b0 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
@@ -25,7 +25,6 @@
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.analysis.AnalyzerScope;
@@ -45,14 +44,11 @@
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
-import java.util.stream.Collectors;
 
 import static java.util.Collections.singletonList;
 import static java.util.Collections.singletonMap;
@@ -658,7 +654,7 @@ protected Function<Object, Object> loadBlockExpected() {
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
         assertFalse("keyword doesn't support ignore_malformed", ignoreMalformed);
-        return new KeywordSyntheticSourceSupport(
+        return new KeywordFieldSyntheticSourceSupport(
             randomBoolean() ? null : between(10, 100),
             randomBoolean(),
             usually() ? null : randomAlphaOfLength(2),
@@ -666,110 +662,6 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed)
         );
     }
 
-    static class KeywordSyntheticSourceSupport implements SyntheticSourceSupport {
-        private final Integer ignoreAbove;
-        private final boolean allIgnored;
-        private final boolean store;
-        private final boolean docValues;
-        private final String nullValue;
-        private final boolean exampleSortsUsingIgnoreAbove;
-
-        KeywordSyntheticSourceSupport(Integer ignoreAbove, boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) {
-            this.ignoreAbove = ignoreAbove;
-            this.allIgnored = ignoreAbove != null && rarely();
-            this.store = store;
-            this.nullValue = nullValue;
-            this.exampleSortsUsingIgnoreAbove = exampleSortsUsingIgnoreAbove;
-            this.docValues = store ? randomBoolean() : true;
-        }
-
-        @Override
-        public SyntheticSourceExample example(int maxValues) {
-            return example(maxValues, false);
-        }
-
-        public SyntheticSourceExample example(int maxValues, boolean loadBlockFromSource) {
-            if (randomBoolean()) {
-                Tuple<String, String> v = generateValue();
-                Object loadBlock = v.v2();
-                if (loadBlockFromSource == false && ignoreAbove != null && v.v2().length() > ignoreAbove) {
-                    loadBlock = null;
-                }
-                return new SyntheticSourceExample(v.v1(), v.v2(), loadBlock, this::mapping);
-            }
-            List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
-            List<String> in = values.stream().map(Tuple::v1).toList();
-            List<String> outPrimary = new ArrayList<>();
-            List<String> outExtraValues = new ArrayList<>();
-            values.stream().map(Tuple::v2).forEach(v -> {
-                if (exampleSortsUsingIgnoreAbove && ignoreAbove != null && v.length() > ignoreAbove) {
-                    outExtraValues.add(v);
-                } else {
-                    outPrimary.add(v);
-                }
-            });
-            List<String> outList = store ? outPrimary : new HashSet<>(outPrimary).stream().sorted().collect(Collectors.toList());
-            List<String> loadBlock;
-            if (loadBlockFromSource) {
-                // The block loader infrastructure will never return nulls. Just zap them all.
-                loadBlock = in.stream().filter(m -> m != null).toList();
-            } else if (docValues) {
-                loadBlock = new HashSet<>(outPrimary).stream().sorted().collect(Collectors.toList());
-            } else {
-                loadBlock = List.copyOf(outList);
-            }
-            Object loadBlockResult = loadBlock.size() == 1 ? loadBlock.get(0) : loadBlock;
-            outList.addAll(outExtraValues);
-            Object out = outList.size() == 1 ? outList.get(0) : outList;
-            return new SyntheticSourceExample(in, out, loadBlockResult, this::mapping);
-        }
-
-        private Tuple<String, String> generateValue() {
-            if (nullValue != null && randomBoolean()) {
-                return Tuple.tuple(null, nullValue);
-            }
-            int length = 5;
-            if (ignoreAbove != null && (allIgnored || randomBoolean())) {
-                length = ignoreAbove + 5;
-            }
-            String v = randomAlphaOfLength(length);
-            return Tuple.tuple(v, v);
-        }
-
-        private void mapping(XContentBuilder b) throws IOException {
-            b.field("type", "keyword");
-            if (nullValue != null) {
-                b.field("null_value", nullValue);
-            }
-            if (ignoreAbove != null) {
-                b.field("ignore_above", ignoreAbove);
-            }
-            if (store) {
-                b.field("store", true);
-            }
-            if (docValues == false) {
-                b.field("doc_values", false);
-            }
-        }
-
-        @Override
-        public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
-            return List.of(
-                new SyntheticSourceInvalidExample(
-                    equalTo(
-                        "field [field] of type [keyword] doesn't support synthetic source because "
-                            + "it doesn't have doc values and isn't stored"
-                    ),
-                    b -> b.field("type", "keyword").field("doc_values", false)
-                ),
-                new SyntheticSourceInvalidExample(
-                    equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"),
-                    b -> b.field("type", "keyword").field("normalizer", "lowercase")
-                )
-            );
-        }
-    }
-
     @Override
     protected IngestScriptSupport ingestScriptSupport() {
         return new IngestScriptSupport() {
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
index 5d0c1c01ecdcf..50d15be2256ed 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
@@ -75,7 +75,6 @@
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
-import org.hamcrest.Matcher;
 import org.junit.AssumptionViolatedException;
 
 import java.io.IOException;
@@ -1178,120 +1177,12 @@ protected boolean supportsIgnoreMalformed() {
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
         assumeFalse("ignore_malformed not supported", ignoreMalformed);
-        boolean storeTextField = randomBoolean();
-        boolean storedKeywordField = storeTextField || randomBoolean();
-        boolean indexText = randomBoolean();
-        Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
-        KeywordFieldMapperTests.KeywordSyntheticSourceSupport keywordSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
-            ignoreAbove,
-            storedKeywordField,
-            null,
-            false == storeTextField
-        );
-        return new SyntheticSourceSupport() {
-            @Override
-            public SyntheticSourceExample example(int maxValues) {
-                if (storeTextField) {
-                    SyntheticSourceExample delegate = keywordSupport.example(maxValues, true);
-                    return new SyntheticSourceExample(
-                        delegate.inputValue(),
-                        delegate.expectedForSyntheticSource(),
-                        delegate.expectedForBlockLoader(),
-                        b -> {
-                            b.field("type", "text");
-                            b.field("store", true);
-                            if (indexText == false) {
-                                b.field("index", false);
-                            }
-                        }
-                    );
-                }
-                // We'll load from _source if ignore_above is defined, otherwise we load from the keyword field.
-                boolean loadingFromSource = ignoreAbove != null;
-                SyntheticSourceExample delegate = keywordSupport.example(maxValues, loadingFromSource);
-                return new SyntheticSourceExample(
-                    delegate.inputValue(),
-                    delegate.expectedForSyntheticSource(),
-                    delegate.expectedForBlockLoader(),
-                    b -> {
-                        b.field("type", "text");
-                        if (indexText == false) {
-                            b.field("index", false);
-                        }
-                        b.startObject("fields");
-                        {
-                            b.startObject(randomAlphaOfLength(4));
-                            delegate.mapping().accept(b);
-                            b.endObject();
-                        }
-                        b.endObject();
-                    }
-                );
-            }
-
-            @Override
-            public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
-                Matcher<String> err = equalTo(
-                    "field [field] of type [text] doesn't support synthetic source unless it is stored or"
-                        + " has a sub-field of type [keyword] with doc values or stored and without a normalizer"
-                );
-                return List.of(
-                    new SyntheticSourceInvalidExample(err, TextFieldMapperTests.this::minimalMapping),
-                    new SyntheticSourceInvalidExample(err, b -> {
-                        b.field("type", "text");
-                        b.startObject("fields");
-                        {
-                            b.startObject("l");
-                            b.field("type", "long");
-                            b.endObject();
-                        }
-                        b.endObject();
-                    }),
-                    new SyntheticSourceInvalidExample(err, b -> {
-                        b.field("type", "text");
-                        b.startObject("fields");
-                        {
-                            b.startObject("kwd");
-                            b.field("type", "keyword");
-                            b.field("normalizer", "lowercase");
-                            b.endObject();
-                        }
-                        b.endObject();
-                    }),
-                    new SyntheticSourceInvalidExample(err, b -> {
-                        b.field("type", "text");
-                        b.startObject("fields");
-                        {
-                            b.startObject("kwd");
-                            b.field("type", "keyword");
-                            b.field("doc_values", "false");
-                            b.endObject();
-                        }
-                        b.endObject();
-                    })
-                );
-            }
-        };
+        return TextFieldFamilySyntheticSourceTestSetup.syntheticSourceSupport("text", true);
     }
 
     @Override
     protected Function<Object, Object> loadBlockExpected(BlockReaderSupport blockReaderSupport, boolean columnReader) {
-        if (nullLoaderExpected(blockReaderSupport.mapper(), blockReaderSupport.loaderFieldName())) {
-            return null;
-        }
-        return v -> ((BytesRef) v).utf8ToString();
-    }
-
-    private boolean nullLoaderExpected(MapperService mapper, String fieldName) {
-        MappedFieldType type = mapper.fieldType(fieldName);
-        if (type instanceof TextFieldType t) {
-            if (t.isSyntheticSource() == false || t.canUseSyntheticSourceDelegateForQuerying() || t.isStored()) {
-                return false;
-            }
-            String parentField = mapper.mappingLookup().parentField(fieldName);
-            return parentField == null || nullLoaderExpected(mapper, parentField);
-        }
-        return false;
+        return TextFieldFamilySyntheticSourceTestSetup.loadBlockExpected(blockReaderSupport, columnReader);
     }
 
     @Override
@@ -1300,9 +1191,8 @@ protected IngestScriptSupport ingestScriptSupport() {
     }
 
     @Override
-    protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader)
-        throws IOException {
-        // Disabled because it currently fails
+    protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) {
+        TextFieldFamilySyntheticSourceTestSetup.validateRoundTripReader(syntheticSource, reader, roundTripReader);
     }
 
     public void testUnknownAnalyzerOnLegacyIndex() throws IOException {
@@ -1433,21 +1323,7 @@ public void testEmpty() throws Exception {
 
     @Override
     protected BlockReaderSupport getSupportedReaders(MapperService mapper, String loaderFieldName) {
-        MappedFieldType ft = mapper.fieldType(loaderFieldName);
-        String parentName = mapper.mappingLookup().parentField(ft.name());
-        if (parentName == null) {
-            TextFieldMapper.TextFieldType text = (TextFieldType) ft;
-            boolean supportsColumnAtATimeReader = text.syntheticSourceDelegate() != null
-                && text.syntheticSourceDelegate().hasDocValues()
-                && text.canUseSyntheticSourceDelegateForQuerying();
-            return new BlockReaderSupport(supportsColumnAtATimeReader, mapper, loaderFieldName);
-        }
-        MappedFieldType parent = mapper.fieldType(parentName);
-        if (false == parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
-            throw new UnsupportedOperationException();
-        }
-        KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent;
-        return new BlockReaderSupport(kwd.hasDocValues(), mapper, loaderFieldName);
+        return TextFieldFamilySyntheticSourceTestSetup.getSupportedReaders(mapper, loaderFieldName);
     }
 
     public void testBlockLoaderFromParentColumnReader() throws IOException {
@@ -1460,7 +1336,7 @@ public void testBlockLoaderParentFromRowStrideReader() throws IOException {
 
     private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSource) throws IOException {
         boolean storeParent = randomBoolean();
-        KeywordFieldMapperTests.KeywordSyntheticSourceSupport kwdSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
+        KeywordFieldSyntheticSourceSupport kwdSupport = new KeywordFieldSyntheticSourceSupport(
             null,
             storeParent,
             null,
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
new file mode 100644
index 0000000000000..53ecb75c18d9a
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport {
+    private final Integer ignoreAbove;
+    private final boolean allIgnored;
+    private final boolean store;
+    private final boolean docValues;
+    private final String nullValue;
+    private final boolean exampleSortsUsingIgnoreAbove;
+
+    KeywordFieldSyntheticSourceSupport(Integer ignoreAbove, boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) {
+        this.ignoreAbove = ignoreAbove;
+        this.allIgnored = ignoreAbove != null && LuceneTestCase.rarely();
+        this.store = store;
+        this.nullValue = nullValue;
+        this.exampleSortsUsingIgnoreAbove = exampleSortsUsingIgnoreAbove;
+        this.docValues = store ? ESTestCase.randomBoolean() : true;
+    }
+
+    @Override
+    public MapperTestCase.SyntheticSourceExample example(int maxValues) {
+        return example(maxValues, false);
+    }
+
+    public MapperTestCase.SyntheticSourceExample example(int maxValues, boolean loadBlockFromSource) {
+        if (ESTestCase.randomBoolean()) {
+            Tuple<String, String> v = generateValue();
+            Object loadBlock = v.v2();
+            if (loadBlockFromSource == false && ignoreAbove != null && v.v2().length() > ignoreAbove) {
+                loadBlock = null;
+            }
+            return new MapperTestCase.SyntheticSourceExample(v.v1(), v.v2(), loadBlock, this::mapping);
+        }
+        List<Tuple<String, String>> values = ESTestCase.randomList(1, maxValues, this::generateValue);
+        List<String> in = values.stream().map(Tuple::v1).toList();
+        List<String> outPrimary = new ArrayList<>();
+        List<String> outExtraValues = new ArrayList<>();
+        values.stream().map(Tuple::v2).forEach(v -> {
+            if (exampleSortsUsingIgnoreAbove && ignoreAbove != null && v.length() > ignoreAbove) {
+                outExtraValues.add(v);
+            } else {
+                outPrimary.add(v);
+            }
+        });
+        List<String> outList = store ? outPrimary : new HashSet<>(outPrimary).stream().sorted().collect(Collectors.toList());
+        List<String> loadBlock;
+        if (loadBlockFromSource) {
+            // The block loader infrastructure will never return nulls. Just zap them all.
+            loadBlock = in.stream().filter(m -> m != null).toList();
+        } else if (docValues) {
+            loadBlock = new HashSet<>(outPrimary).stream().sorted().collect(Collectors.toList());
+        } else {
+            loadBlock = List.copyOf(outList);
+        }
+        Object loadBlockResult = loadBlock.size() == 1 ? loadBlock.get(0) : loadBlock;
+        outList.addAll(outExtraValues);
+        Object out = outList.size() == 1 ? outList.get(0) : outList;
+        return new MapperTestCase.SyntheticSourceExample(in, out, loadBlockResult, this::mapping);
+    }
+
+    private Tuple<String, String> generateValue() {
+        if (nullValue != null && ESTestCase.randomBoolean()) {
+            return Tuple.tuple(null, nullValue);
+        }
+        int length = 5;
+        if (ignoreAbove != null && (allIgnored || ESTestCase.randomBoolean())) {
+            length = ignoreAbove + 5;
+        }
+        String v = ESTestCase.randomAlphaOfLength(length);
+        return Tuple.tuple(v, v);
+    }
+
+    private void mapping(XContentBuilder b) throws IOException {
+        b.field("type", "keyword");
+        if (nullValue != null) {
+            b.field("null_value", nullValue);
+        }
+        if (ignoreAbove != null) {
+            b.field("ignore_above", ignoreAbove);
+        }
+        if (store) {
+            b.field("store", true);
+        }
+        if (docValues == false) {
+            b.field("doc_values", false);
+        }
+    }
+
+    @Override
+    public List<MapperTestCase.SyntheticSourceInvalidExample> invalidExample() throws IOException {
+        return List.of(
+            new MapperTestCase.SyntheticSourceInvalidExample(
+                equalTo(
+                    "field [field] of type [keyword] doesn't support synthetic source because "
+                        + "it doesn't have doc values and isn't stored"
+                ),
+                b -> b.field("type", "keyword").field("doc_values", false)
+            ),
+            new MapperTestCase.SyntheticSourceInvalidExample(
+                equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"),
+                b -> b.field("type", "keyword").field("normalizer", "lowercase")
+            )
+        );
+    }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java
index fa0f0e1b95f54..097c23b96bb76 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java
@@ -1286,7 +1286,7 @@ protected BlockReaderSupport getSupportedReaders(MapperService mapper, String lo
      * @param loaderFieldName the field name to use for loading the field
      */
     public record BlockReaderSupport(boolean columnAtATimeReader, boolean syntheticSource, MapperService mapper, String loaderFieldName) {
-        BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, String loaderFieldName) {
+        public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, String loaderFieldName) {
             this(columnAtATimeReader, true, mapper, loaderFieldName);
         }
 
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java
new file mode 100644
index 0000000000000..df4377adc3e35
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.util.BytesRef;
+import org.hamcrest.Matcher;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.function.Function;
+
+import static org.elasticsearch.test.ESTestCase.between;
+import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
+import static org.elasticsearch.test.ESTestCase.randomBoolean;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Provides functionality needed to test synthetic source support in text and text-like fields (e.g. "text", "annotated_text").
+ */
+public final class TextFieldFamilySyntheticSourceTestSetup {
+    public static MapperTestCase.SyntheticSourceSupport syntheticSourceSupport(String fieldType, boolean supportsCustomIndexConfiguration) {
+        return new TextFieldFamilySyntheticSourceSupport(fieldType, supportsCustomIndexConfiguration);
+    }
+
+    public static MapperTestCase.BlockReaderSupport getSupportedReaders(MapperService mapper, String loaderFieldName) {
+        MappedFieldType ft = mapper.fieldType(loaderFieldName);
+        String parentName = mapper.mappingLookup().parentField(ft.name());
+        if (parentName == null) {
+            TextFieldMapper.TextFieldType text = (TextFieldMapper.TextFieldType) ft;
+            boolean supportsColumnAtATimeReader = text.syntheticSourceDelegate() != null
+                && text.syntheticSourceDelegate().hasDocValues()
+                && text.canUseSyntheticSourceDelegateForQuerying();
+            return new MapperTestCase.BlockReaderSupport(supportsColumnAtATimeReader, mapper, loaderFieldName);
+        }
+        MappedFieldType parent = mapper.fieldType(parentName);
+        if (false == parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
+            throw new UnsupportedOperationException();
+        }
+        KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent;
+        return new MapperTestCase.BlockReaderSupport(kwd.hasDocValues(), mapper, loaderFieldName);
+    }
+
+    public static Function<Object, Object> loadBlockExpected(MapperTestCase.BlockReaderSupport blockReaderSupport, boolean columnReader) {
+        if (nullLoaderExpected(blockReaderSupport.mapper(), blockReaderSupport.loaderFieldName())) {
+            return null;
+        }
+        return v -> ((BytesRef) v).utf8ToString();
+    }
+
+    private static boolean nullLoaderExpected(MapperService mapper, String fieldName) {
+        MappedFieldType type = mapper.fieldType(fieldName);
+        if (type instanceof TextFieldMapper.TextFieldType t) {
+            if (t.isSyntheticSource() == false || t.canUseSyntheticSourceDelegateForQuerying() || t.isStored()) {
+                return false;
+            }
+            String parentField = mapper.mappingLookup().parentField(fieldName);
+            return parentField == null || nullLoaderExpected(mapper, parentField);
+        }
+        return false;
+    }
+
+    public static void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) {
+        // `reader` here is reader of original document and `roundTripReader` reads document
+        // created from synthetic source.
+        // This check fails when synthetic source is constructed using keyword subfield
+        // since in that case values are sorted (due to being read from doc values) but original document isn't.
+        //
+        // So it is disabled.
+    }
+
+    private static class TextFieldFamilySyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport {
+        private final String fieldType;
+        private final boolean storeTextField;
+        private final boolean storedKeywordField;
+        private final boolean indexText;
+        private final Integer ignoreAbove;
+        private final KeywordFieldSyntheticSourceSupport keywordSupport;
+
+        TextFieldFamilySyntheticSourceSupport(String fieldType, boolean supportsCustomIndexConfiguration) {
+            this.fieldType = fieldType;
+            this.storeTextField = randomBoolean();
+            this.storedKeywordField = storeTextField || randomBoolean();
+            this.indexText = supportsCustomIndexConfiguration ? randomBoolean() : true;
+            this.ignoreAbove = randomBoolean() ? null : between(10, 100);
+            this.keywordSupport = new KeywordFieldSyntheticSourceSupport(ignoreAbove, storedKeywordField, null, false == storeTextField);
+        }
+
+        @Override
+        public MapperTestCase.SyntheticSourceExample example(int maxValues) {
+            if (storeTextField) {
+                MapperTestCase.SyntheticSourceExample delegate = keywordSupport.example(maxValues, true);
+                return new MapperTestCase.SyntheticSourceExample(
+                    delegate.inputValue(),
+                    delegate.expectedForSyntheticSource(),
+                    delegate.expectedForBlockLoader(),
+                    b -> {
+                        b.field("type", fieldType);
+                        b.field("store", true);
+                        if (indexText == false) {
+                            b.field("index", false);
+                        }
+                    }
+                );
+            }
+            // We'll load from _source if ignore_above is defined, otherwise we load from the keyword field.
+            boolean loadingFromSource = ignoreAbove != null;
+            MapperTestCase.SyntheticSourceExample delegate = keywordSupport.example(maxValues, loadingFromSource);
+            return new MapperTestCase.SyntheticSourceExample(
+                delegate.inputValue(),
+                delegate.expectedForSyntheticSource(),
+                delegate.expectedForBlockLoader(),
+                b -> {
+                    b.field("type", fieldType);
+                    if (indexText == false) {
+                        b.field("index", false);
+                    }
+                    b.startObject("fields");
+                    {
+                        b.startObject(randomAlphaOfLength(4));
+                        delegate.mapping().accept(b);
+                        b.endObject();
+                    }
+                    b.endObject();
+                }
+            );
+        }
+
+        @Override
+        public List<MapperTestCase.SyntheticSourceInvalidExample> invalidExample() throws IOException {
+            Matcher<String> err = equalTo(
+                String.format(
+                    Locale.ROOT,
+                    "field [field] of type [%s] doesn't support synthetic source unless it is stored or"
+                        + " has a sub-field of type [keyword] with doc values or stored and without a normalizer",
+                    fieldType
+                )
+            );
+            return List.of(
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> b.field("type", fieldType)),
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> {
+                    b.field("type", fieldType);
+                    b.startObject("fields");
+                    {
+                        b.startObject("l");
+                        b.field("type", "long");
+                        b.endObject();
+                    }
+                    b.endObject();
+                }),
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> {
+                    b.field("type", fieldType);
+                    b.startObject("fields");
+                    {
+                        b.startObject("kwd");
+                        b.field("type", "keyword");
+                        b.field("normalizer", "lowercase");
+                        b.endObject();
+                    }
+                    b.endObject();
+                }),
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> {
+                    b.field("type", fieldType);
+                    b.startObject("fields");
+                    {
+                        b.startObject("kwd");
+                        b.field("type", "keyword");
+                        b.field("doc_values", "false");
+                        b.endObject();
+                    }
+                    b.endObject();
+                }),
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> {
+                    b.field("type", fieldType);
+                    b.field("store", "false");
+                    b.startObject("fields");
+                    {
+                        b.startObject("kwd");
+                        b.field("type", "keyword");
+                        b.field("doc_values", "false");
+                        b.endObject();
+                    }
+                    b.endObject();
+                }),
+                new MapperTestCase.SyntheticSourceInvalidExample(err, b -> {
+                    b.field("type", fieldType);
+                    b.startObject("fields");
+                    {
+                        b.startObject("kwd");
+                        b.field("type", "keyword");
+                        b.field("doc_values", "false");
+                        b.field("store", "false");
+                        b.endObject();
+                    }
+                    b.endObject();
+                })
+            );
+        }
+    }
+}